From 13508a9d888df519a389b6bd187b5f745772627b Mon Sep 17 00:00:00 2001
From: Upendra Rao Vedullapalli <upendrao@gmail.com>
Date: Wed, 4 Oct 2023 15:20:51 +0200
Subject: [PATCH 01/98] feat(bigquery): excluding projects without any datasets
 from ingestion (#8535)

Co-authored-by: Upendra Vedullapalli <upendra.rao.vedullapalli@entur.org>
Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
---
 .../ingestion/source/bigquery_v2/bigquery.py  | 19 +++++--
 .../source/bigquery_v2/bigquery_config.py     |  5 ++
 .../source/bigquery_v2/bigquery_report.py     |  2 +
 .../tests/unit/test_bigquery_source.py        | 53 ++++++++++++++++++-
 4 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index f6adbcf033bcc..fee181864a2d6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -600,9 +600,6 @@ def _process_project(
         db_views: Dict[str, List[BigqueryView]] = {}
 
         project_id = bigquery_project.id
-
-        yield from self.gen_project_id_containers(project_id)
-
         try:
             bigquery_project.datasets = (
                 self.bigquery_data_dictionary.get_datasets_for_project_id(project_id)
@@ -619,11 +616,23 @@ def _process_project(
             return None
 
         if len(bigquery_project.datasets) == 0:
-            logger.warning(
-                f"No dataset found in {project_id}. Either there are no datasets in this project or missing bigquery.datasets.get permission. You can assign predefined roles/bigquery.metadataViewer role to your service account."
+            more_info = (
+                "Either there are no datasets in this project or missing bigquery.datasets.get permission. "
+                "You can assign predefined roles/bigquery.metadataViewer role to your service account."
             )
+            if self.config.exclude_empty_projects:
+                self.report.report_dropped(project_id)
+                warning_message = f"Excluded project '{project_id}' since no were datasets found. {more_info}"
+            else:
+                yield from self.gen_project_id_containers(project_id)
+                warning_message = (
+                    f"No datasets found in project '{project_id}'. {more_info}"
+                )
+            logger.warning(warning_message)
             return
 
+        yield from self.gen_project_id_containers(project_id)
+
         self.report.num_project_datasets_to_scan[project_id] = len(
             bigquery_project.datasets
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 3b06a4699c566..483355a85ac05 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -265,6 +265,11 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool:
         description="Maximum number of entries for the in-memory caches of FileBacked data structures.",
     )
 
+    exclude_empty_projects: bool = Field(
+        default=False,
+        description="Option to exclude empty projects from being ingested.",
+    )
+
     @root_validator(pre=False)
     def profile_default_settings(cls, values: Dict) -> Dict:
         # Extra default SQLAlchemy option for better connection pooling and threading.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
index 661589a0c58e5..9d92b011ee285 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
@@ -122,6 +122,8 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR
 
     usage_state_size: Optional[str] = None
 
+    exclude_empty_projects: Optional[bool] = None
+
     schema_api_perf: BigQuerySchemaApiPerfReport = field(
         default_factory=BigQuerySchemaApiPerfReport
     )
diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py
index 4fc6c31626ba8..e9e91361f49f4 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_source.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_source.py
@@ -3,13 +3,14 @@
 import os
 from datetime import datetime, timedelta, timezone
 from types import SimpleNamespace
-from typing import Any, Dict, Optional, cast
+from typing import Any, Dict, List, Optional, cast
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 from google.api_core.exceptions import GoogleAPICallError
 from google.cloud.bigquery.table import Row, TableListItem
 
+from datahub.configuration.common import AllowDenyPattern
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.bigquery_v2.bigquery import BigqueryV2Source
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
@@ -17,9 +18,13 @@
     BigqueryTableIdentifier,
     BigQueryTableRef,
 )
-from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
+from datahub.ingestion.source.bigquery_v2.bigquery_config import (
+    BigQueryConnectionConfig,
+    BigQueryV2Config,
+)
 from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
 from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
+    BigqueryDataset,
     BigqueryProject,
     BigQuerySchemaApi,
     BigqueryView,
@@ -854,3 +859,47 @@ def test_get_table_name(full_table_name: str, datahub_full_table_name: str) -> N
             BigqueryTableIdentifier.from_string_name(full_table_name).get_table_name()
             == datahub_full_table_name
         )
+
+
+def test_default_config_for_excluding_projects_and_datasets():
+    config = BigQueryV2Config.parse_obj({})
+    assert config.exclude_empty_projects is False
+    config = BigQueryV2Config.parse_obj({"exclude_empty_projects": True})
+    assert config.exclude_empty_projects
+
+
+@patch.object(BigQueryConnectionConfig, "get_bigquery_client", new=lambda self: None)
+@patch.object(BigQuerySchemaApi, "get_datasets_for_project_id")
+def test_excluding_empty_projects_from_ingestion(
+    get_datasets_for_project_id_mock,
+):
+    project_id_with_datasets = "project-id-with-datasets"
+    project_id_without_datasets = "project-id-without-datasets"
+
+    def get_datasets_for_project_id_side_effect(
+        project_id: str,
+    ) -> List[BigqueryDataset]:
+        return (
+            []
+            if project_id == project_id_without_datasets
+            else [BigqueryDataset("some-dataset")]
+        )
+
+    get_datasets_for_project_id_mock.side_effect = (
+        get_datasets_for_project_id_side_effect
+    )
+
+    base_config = {
+        "project_ids": [project_id_with_datasets, project_id_without_datasets],
+        "schema_pattern": AllowDenyPattern(deny=[".*"]),
+        "include_usage_statistics": False,
+        "include_table_lineage": False,
+    }
+
+    config = BigQueryV2Config.parse_obj(base_config)
+    source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test-1"))
+    assert len({wu.metadata.entityUrn for wu in source.get_workunits()}) == 2  # type: ignore
+
+    config = BigQueryV2Config.parse_obj({**base_config, "exclude_empty_projects": True})
+    source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test-2"))
+    assert len({wu.metadata.entityUrn for wu in source.get_workunits()}) == 1  # type: ignore

From d3346a04e486fa098129b626e61013cab4f69350 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 4 Oct 2023 10:22:45 -0400
Subject: [PATCH 02/98] feat(ingest/unity): Ingest notebooks and their lineage
 (#8940)

---
 .../sources/databricks/unity-catalog_pre.md   |   1 +
 metadata-ingestion/setup.py                   |   2 +-
 .../src/datahub/emitter/mcp_builder.py        |  12 ++
 .../ingestion/source/common/subtypes.py       |   3 +
 .../datahub/ingestion/source/unity/config.py  |   9 +-
 .../datahub/ingestion/source/unity/proxy.py   |  89 +++++++----
 .../ingestion/source/unity/proxy_types.py     |  45 +++++-
 .../datahub/ingestion/source/unity/report.py  |   8 +-
 .../datahub/ingestion/source/unity/source.py  | 148 ++++++++++++++----
 .../datahub/ingestion/source/unity/usage.py   |  12 +-
 10 files changed, 257 insertions(+), 72 deletions(-)

diff --git a/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md b/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md
index 2be8846b87bea..ae2883343d7e8 100644
--- a/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md
+++ b/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md
@@ -13,6 +13,7 @@
     * Ownership of or `SELECT` privilege on any tables and views you want to ingest
     * [Ownership documentation](https://docs.databricks.com/data-governance/unity-catalog/manage-privileges/ownership.html)
     * [Privileges documentation](https://docs.databricks.com/data-governance/unity-catalog/manage-privileges/privileges.html)
+  + To ingest your workspace's notebooks and respective lineage, your service principal must have `CAN_READ` privileges on the folders containing the notebooks you want to ingest: [guide](https://docs.databricks.com/en/security/auth-authz/access-control/workspace-acl.html#folder-permissions).
   + To `include_usage_statistics` (enabled by default), your service principal must have `CAN_MANAGE` permissions on any SQL Warehouses you want to ingest: [guide](https://docs.databricks.com/security/auth-authz/access-control/sql-endpoint-acl.html).
   + To ingest `profiling` information with `call_analyze` (enabled by default), your service principal must have ownership or `MODIFY` privilege on any tables you want to profile.
     * Alternatively, you can run [ANALYZE TABLE](https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-analyze-table.html) yourself on any tables you want to profile, then set `call_analyze` to `false`.
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 34afa8cdb39a4..fe8e3be4632c4 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -250,7 +250,7 @@
 
 databricks = {
     # 0.1.11 appears to have authentication issues with azure databricks
-    "databricks-sdk>=0.1.1, != 0.1.11",
+    "databricks-sdk>=0.9.0",
     "pyspark",
     "requests",
 }
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 844a29f1c78a3..7419577b367aa 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -9,6 +9,7 @@
     make_container_urn,
     make_data_platform_urn,
     make_dataplatform_instance_urn,
+    make_dataset_urn_with_platform_instance,
 )
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -125,6 +126,17 @@ class BucketKey(ContainerKey):
     bucket_name: str
 
 
+class NotebookKey(DatahubKey):
+    notebook_id: int
+    platform: str
+    instance: Optional[str]
+
+    def as_urn(self) -> str:
+        return make_dataset_urn_with_platform_instance(
+            platform=self.platform, platform_instance=self.instance, name=self.guid()
+        )
+
+
 class DatahubKeyJSONEncoder(json.JSONEncoder):
     # overload method default
     def default(self, obj: Any) -> Any:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
index a2d89d26112f4..741b4789bef21 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
@@ -16,6 +16,9 @@ class DatasetSubTypes(str, Enum):
     SALESFORCE_STANDARD_OBJECT = "Object"
     POWERBI_DATASET_TABLE = "PowerBI Dataset Table"
 
+    # TODO: Create separate entity...
+    NOTEBOOK = "Notebook"
+
 
 class DatasetContainerSubTypes(str, Enum):
     # Generic SubTypes
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index 94ff755e3b254..a49c789a82f27 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -127,11 +127,16 @@ class UnityCatalogSourceConfig(
         description='Attach domains to catalogs, schemas or tables during ingestion using regex patterns. Domain key can be a guid like *urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba* or a string like "Marketing".) If you provide strings, then datahub will attempt to resolve this name to a guid, and will error out if this fails. There can be multiple domain keys specified.',
     )
 
-    include_table_lineage: Optional[bool] = pydantic.Field(
+    include_table_lineage: bool = pydantic.Field(
         default=True,
         description="Option to enable/disable lineage generation.",
     )
 
+    include_notebooks: bool = pydantic.Field(
+        default=False,
+        description="Ingest notebooks, represented as DataHub datasets.",
+    )
+
     include_ownership: bool = pydantic.Field(
         default=False,
         description="Option to enable/disable ownership generation for metastores, catalogs, schemas, and tables.",
@@ -141,7 +146,7 @@ class UnityCatalogSourceConfig(
         "include_table_ownership", "include_ownership"
     )
 
-    include_column_lineage: Optional[bool] = pydantic.Field(
+    include_column_lineage: bool = pydantic.Field(
         default=True,
         description="Option to enable/disable lineage generation. Currently we have to call a rest call per column to get column level lineage due to the Databrick api which can slow down ingestion. ",
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index e92f4ff07b1ad..2401f1c3d163c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -23,6 +23,7 @@
     QueryStatementType,
     QueryStatus,
 )
+from databricks.sdk.service.workspace import ObjectType
 
 import datahub
 from datahub.ingestion.source.unity.proxy_profiling import (
@@ -33,6 +34,7 @@
     Catalog,
     Column,
     Metastore,
+    Notebook,
     Query,
     Schema,
     ServicePrincipal,
@@ -137,6 +139,21 @@ def service_principals(self) -> Iterable[ServicePrincipal]:
         for principal in self._workspace_client.service_principals.list():
             yield self._create_service_principal(principal)
 
+    def workspace_notebooks(self) -> Iterable[Notebook]:
+        for obj in self._workspace_client.workspace.list("/", recursive=True):
+            if obj.object_type == ObjectType.NOTEBOOK:
+                yield Notebook(
+                    id=obj.object_id,
+                    path=obj.path,
+                    language=obj.language,
+                    created_at=datetime.fromtimestamp(
+                        obj.created_at / 1000, tz=timezone.utc
+                    ),
+                    modified_at=datetime.fromtimestamp(
+                        obj.modified_at / 1000, tz=timezone.utc
+                    ),
+                )
+
     def query_history(
         self,
         start_time: datetime,
@@ -153,7 +170,7 @@ def query_history(
                     "start_time_ms": start_time.timestamp() * 1000,
                     "end_time_ms": end_time.timestamp() * 1000,
                 },
-                "statuses": [QueryStatus.FINISHED.value],
+                "statuses": [QueryStatus.FINISHED],
                 "statement_types": [typ.value for typ in ALLOWED_STATEMENT_TYPES],
             }
         )
@@ -196,61 +213,75 @@ def _query_history(
                 method, path, body={**body, "page_token": response["next_page_token"]}
             )
 
-    def list_lineages_by_table(self, table_name: str) -> dict:
+    def list_lineages_by_table(
+        self, table_name: str, include_entity_lineage: bool
+    ) -> dict:
         """List table lineage by table name."""
         return self._workspace_client.api_client.do(
             method="GET",
-            path="/api/2.0/lineage-tracking/table-lineage/get",
-            body={"table_name": table_name},
+            path="/api/2.0/lineage-tracking/table-lineage",
+            body={
+                "table_name": table_name,
+                "include_entity_lineage": include_entity_lineage,
+            },
         )
 
     def list_lineages_by_column(self, table_name: str, column_name: str) -> dict:
         """List column lineage by table name and column name."""
         return self._workspace_client.api_client.do(
             "GET",
-            "/api/2.0/lineage-tracking/column-lineage/get",
+            "/api/2.0/lineage-tracking/column-lineage",
             body={"table_name": table_name, "column_name": column_name},
         )
 
-    def table_lineage(self, table: Table) -> None:
+    def table_lineage(
+        self, table: Table, include_entity_lineage: bool
+    ) -> Optional[dict]:
         # Lineage endpoint doesn't exists on 2.1 version
         try:
             response: dict = self.list_lineages_by_table(
-                table_name=f"{table.schema.catalog.name}.{table.schema.name}.{table.name}"
+                table_name=table.ref.qualified_table_name,
+                include_entity_lineage=include_entity_lineage,
             )
-            table.upstreams = {
-                TableReference(
-                    table.schema.catalog.metastore.id,
-                    item["catalog_name"],
-                    item["schema_name"],
-                    item["name"],
-                ): {}
-                for item in response.get("upstream_tables", [])
-            }
+
+            for item in response.get("upstreams") or []:
+                if "tableInfo" in item:
+                    table_ref = TableReference.create_from_lineage(
+                        item["tableInfo"], table.schema.catalog.metastore.id
+                    )
+                    if table_ref:
+                        table.upstreams[table_ref] = {}
+                for notebook in item.get("notebookInfos") or []:
+                    table.upstream_notebooks.add(notebook["notebook_id"])
+
+            for item in response.get("downstreams") or []:
+                for notebook in item.get("notebookInfos") or []:
+                    table.downstream_notebooks.add(notebook["notebook_id"])
+
+            return response
         except Exception as e:
             logger.error(f"Error getting lineage: {e}")
+            return None
 
-    def get_column_lineage(self, table: Table) -> None:
+    def get_column_lineage(self, table: Table, include_entity_lineage: bool) -> None:
         try:
-            table_lineage_response: dict = self.list_lineages_by_table(
-                table_name=f"{table.schema.catalog.name}.{table.schema.name}.{table.name}"
+            table_lineage = self.table_lineage(
+                table, include_entity_lineage=include_entity_lineage
             )
-            if table_lineage_response:
+            if table_lineage:
                 for column in table.columns:
                     response: dict = self.list_lineages_by_column(
-                        table_name=f"{table.schema.catalog.name}.{table.schema.name}.{table.name}",
+                        table_name=table.ref.qualified_table_name,
                         column_name=column.name,
                     )
                     for item in response.get("upstream_cols", []):
-                        table_ref = TableReference(
-                            table.schema.catalog.metastore.id,
-                            item["catalog_name"],
-                            item["schema_name"],
-                            item["table_name"],
+                        table_ref = TableReference.create_from_lineage(
+                            item, table.schema.catalog.metastore.id
                         )
-                        table.upstreams.setdefault(table_ref, {}).setdefault(
-                            column.name, []
-                        ).append(item["name"])
+                        if table_ref:
+                            table.upstreams.setdefault(table_ref, {}).setdefault(
+                                column.name, []
+                            ).append(item["name"])
 
         except Exception as e:
             logger.error(f"Error getting lineage: {e}")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index 2b943d8c98e7d..d57f20245913f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -1,8 +1,10 @@
 # Supported types are available at
 # https://api-docs.databricks.com/rest/latest/unity-catalog-api-specification-2-1.html?_ga=2.151019001.1795147704.1666247755-2119235717.1666247755
+import dataclasses
+import logging
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Dict, List, Optional
+from typing import Dict, FrozenSet, List, Optional, Set
 
 from databricks.sdk.service.catalog import (
     CatalogType,
@@ -11,6 +13,7 @@
     TableType,
 )
 from databricks.sdk.service.sql import QueryStatementType
+from databricks.sdk.service.workspace import Language
 
 from datahub.metadata.schema_classes import (
     ArrayTypeClass,
@@ -26,6 +29,8 @@
     TimeTypeClass,
 )
 
+logger = logging.getLogger(__name__)
+
 DATA_TYPE_REGISTRY: dict = {
     ColumnTypeName.BOOLEAN: BooleanTypeClass,
     ColumnTypeName.BYTE: BytesTypeClass,
@@ -66,6 +71,9 @@
 ALLOWED_STATEMENT_TYPES = {*OPERATION_STATEMENT_TYPES.keys(), QueryStatementType.SELECT}
 
 
+NotebookId = int
+
+
 @dataclass
 class CommonProperty:
     id: str
@@ -136,6 +144,19 @@ def create(cls, table: "Table") -> "TableReference":
             table.name,
         )
 
+    @classmethod
+    def create_from_lineage(cls, d: dict, metastore: str) -> Optional["TableReference"]:
+        try:
+            return cls(
+                metastore,
+                d["catalog_name"],
+                d["schema_name"],
+                d.get("table_name", d["name"]),  # column vs table query output
+            )
+        except Exception as e:
+            logger.warning(f"Failed to create TableReference from {d}: {e}")
+            return None
+
     def __str__(self) -> str:
         return f"{self.metastore}.{self.catalog}.{self.schema}.{self.table}"
 
@@ -166,6 +187,8 @@ class Table(CommonProperty):
     view_definition: Optional[str]
     properties: Dict[str, str]
     upstreams: Dict[TableReference, Dict[str, List[str]]] = field(default_factory=dict)
+    upstream_notebooks: Set[NotebookId] = field(default_factory=set)
+    downstream_notebooks: Set[NotebookId] = field(default_factory=set)
 
     ref: TableReference = field(init=False)
 
@@ -228,3 +251,23 @@ def __bool__(self):
                 self.max is not None,
             )
         )
+
+
+@dataclass
+class Notebook:
+    id: NotebookId
+    path: str
+    language: Language
+    created_at: datetime
+    modified_at: datetime
+
+    upstreams: FrozenSet[TableReference] = field(default_factory=frozenset)
+
+    @classmethod
+    def add_upstream(cls, upstream: TableReference, notebook: "Notebook") -> "Notebook":
+        return cls(
+            **{  # type: ignore
+                **dataclasses.asdict(notebook),
+                "upstreams": frozenset([*notebook.upstreams, upstream]),
+            }
+        )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
index 8382b31a56add..808172a136bb3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
@@ -5,21 +5,23 @@
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalSourceReport,
 )
+from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.utilities.lossy_collections import LossyDict, LossyList
 
 
 @dataclass
-class UnityCatalogReport(StaleEntityRemovalSourceReport):
+class UnityCatalogReport(IngestionStageReport, StaleEntityRemovalSourceReport):
     metastores: EntityFilterReport = EntityFilterReport.field(type="metastore")
     catalogs: EntityFilterReport = EntityFilterReport.field(type="catalog")
     schemas: EntityFilterReport = EntityFilterReport.field(type="schema")
     tables: EntityFilterReport = EntityFilterReport.field(type="table/view")
     table_profiles: EntityFilterReport = EntityFilterReport.field(type="table profile")
+    notebooks: EntityFilterReport = EntityFilterReport.field(type="notebook")
 
     num_queries: int = 0
     num_queries_dropped_parse_failure: int = 0
-    num_queries_dropped_missing_table: int = 0  # Can be due to pattern filter
-    num_queries_dropped_duplicate_table: int = 0
+    num_queries_missing_table: int = 0  # Can be due to pattern filter
+    num_queries_duplicate_table: int = 0
     num_queries_parsed_by_spark_plan: int = 0
 
     # Distinguish from Operations emitted for created / updated timestamps
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 493acb939c3bb..f2da1aece9fd4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -2,7 +2,7 @@
 import re
 import time
 from datetime import timedelta
-from typing import Dict, Iterable, List, Optional, Set
+from typing import Dict, Iterable, List, Optional, Set, Union
 from urllib.parse import urljoin
 
 from datahub.emitter.mce_builder import (
@@ -18,6 +18,7 @@
     CatalogKey,
     ContainerKey,
     MetastoreKey,
+    NotebookKey,
     UnitySchemaKey,
     add_dataset_to_container,
     gen_containers,
@@ -56,6 +57,8 @@
     Catalog,
     Column,
     Metastore,
+    Notebook,
+    NotebookId,
     Schema,
     ServicePrincipal,
     Table,
@@ -69,6 +72,7 @@
     ViewProperties,
 )
 from datahub.metadata.schema_classes import (
+    BrowsePathsClass,
     DataPlatformInstanceClass,
     DatasetLineageTypeClass,
     DatasetPropertiesClass,
@@ -88,6 +92,7 @@
     UpstreamClass,
     UpstreamLineageClass,
 )
+from datahub.utilities.file_backed_collections import FileBackedDict
 from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
 from datahub.utilities.registries.domain_registry import DomainRegistry
 
@@ -157,6 +162,7 @@ def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig):
         # Global set of table refs
         self.table_refs: Set[TableReference] = set()
         self.view_refs: Set[TableReference] = set()
+        self.notebooks: FileBackedDict[Notebook] = FileBackedDict()
 
     @staticmethod
     def test_connection(config_dict: dict) -> TestConnectionReport:
@@ -176,6 +182,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         ]
 
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        self.report.report_ingestion_stage_start("Start warehouse")
         wait_on_warehouse = None
         if self.config.is_profiling_enabled():
             # Can take several minutes, so start now and wait later
@@ -187,10 +194,23 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 )
                 return
 
+        self.report.report_ingestion_stage_start("Ingest service principals")
         self.build_service_principal_map()
+        if self.config.include_notebooks:
+            self.report.report_ingestion_stage_start("Ingest notebooks")
+            yield from self.process_notebooks()
+
         yield from self.process_metastores()
 
+        if self.config.include_notebooks:
+            self.report.report_ingestion_stage_start("Notebook lineage")
+            for notebook in self.notebooks.values():
+                wu = self._gen_notebook_lineage(notebook)
+                if wu:
+                    yield wu
+
         if self.config.include_usage_statistics:
+            self.report.report_ingestion_stage_start("Ingest usage")
             usage_extractor = UnityCatalogUsageExtractor(
                 config=self.config,
                 report=self.report,
@@ -203,6 +223,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
             )
 
         if self.config.is_profiling_enabled():
+            self.report.report_ingestion_stage_start("Wait on warehouse")
             assert wait_on_warehouse
             timeout = timedelta(seconds=self.config.profiling.max_wait_secs)
             wait_on_warehouse.result(timeout)
@@ -212,6 +233,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 self.unity_catalog_api_proxy,
                 self.gen_dataset_urn,
             )
+            self.report.report_ingestion_stage_start("Profiling")
             yield from profiling_extractor.get_workunits(self.table_refs)
 
     def build_service_principal_map(self) -> None:
@@ -223,6 +245,56 @@ def build_service_principal_map(self) -> None:
                 "service-principals", f"Unable to fetch service principals: {e}"
             )
 
+    def process_notebooks(self) -> Iterable[MetadataWorkUnit]:
+        for notebook in self.unity_catalog_api_proxy.workspace_notebooks():
+            self.notebooks[str(notebook.id)] = notebook
+            yield from self._gen_notebook_aspects(notebook)
+
+    def _gen_notebook_aspects(self, notebook: Notebook) -> Iterable[MetadataWorkUnit]:
+        mcps = MetadataChangeProposalWrapper.construct_many(
+            entityUrn=self.gen_notebook_urn(notebook),
+            aspects=[
+                DatasetPropertiesClass(
+                    name=notebook.path.rsplit("/", 1)[-1],
+                    customProperties={
+                        "path": notebook.path,
+                        "language": notebook.language.value,
+                    },
+                    externalUrl=urljoin(
+                        self.config.workspace_url, f"#notebook/{notebook.id}"
+                    ),
+                    created=TimeStampClass(int(notebook.created_at.timestamp() * 1000)),
+                    lastModified=TimeStampClass(
+                        int(notebook.modified_at.timestamp() * 1000)
+                    ),
+                ),
+                SubTypesClass(typeNames=[DatasetSubTypes.NOTEBOOK]),
+                BrowsePathsClass(paths=notebook.path.split("/")),
+                # TODO: Add DPI aspect
+            ],
+        )
+        for mcp in mcps:
+            yield mcp.as_workunit()
+
+        self.report.notebooks.processed(notebook.path)
+
+    def _gen_notebook_lineage(self, notebook: Notebook) -> Optional[MetadataWorkUnit]:
+        if not notebook.upstreams:
+            return None
+
+        return MetadataChangeProposalWrapper(
+            entityUrn=self.gen_notebook_urn(notebook),
+            aspect=UpstreamLineageClass(
+                upstreams=[
+                    UpstreamClass(
+                        dataset=self.gen_dataset_urn(upstream_ref),
+                        type=DatasetLineageTypeClass.COPY,
+                    )
+                    for upstream_ref in notebook.upstreams
+                ]
+            ),
+        ).as_workunit()
+
     def process_metastores(self) -> Iterable[MetadataWorkUnit]:
         metastore = self.unity_catalog_api_proxy.assigned_metastore()
         yield from self.gen_metastore_containers(metastore)
@@ -247,6 +319,7 @@ def process_schemas(self, catalog: Catalog) -> Iterable[MetadataWorkUnit]:
                 self.report.schemas.dropped(schema.id)
                 continue
 
+            self.report.report_ingestion_stage_start(f"Ingest schema {schema.id}")
             yield from self.gen_schema_containers(schema)
             yield from self.process_tables(schema)
 
@@ -282,13 +355,21 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
         ownership = self._create_table_ownership_aspect(table)
         data_platform_instance = self._create_data_platform_instance_aspect(table)
 
-        lineage: Optional[UpstreamLineageClass] = None
         if self.config.include_column_lineage:
-            self.unity_catalog_api_proxy.get_column_lineage(table)
-            lineage = self._generate_column_lineage_aspect(dataset_urn, table)
+            self.unity_catalog_api_proxy.get_column_lineage(
+                table, include_entity_lineage=self.config.include_notebooks
+            )
         elif self.config.include_table_lineage:
-            self.unity_catalog_api_proxy.table_lineage(table)
-            lineage = self._generate_lineage_aspect(dataset_urn, table)
+            self.unity_catalog_api_proxy.table_lineage(
+                table, include_entity_lineage=self.config.include_notebooks
+            )
+        lineage = self._generate_lineage_aspect(dataset_urn, table)
+
+        if self.config.include_notebooks:
+            for notebook_id in table.downstream_notebooks:
+                self.notebooks[str(notebook_id)] = Notebook.add_upstream(
+                    table.ref, self.notebooks[str(notebook_id)]
+                )
 
         yield from [
             mcp.as_workunit()
@@ -308,7 +389,7 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
             )
         ]
 
-    def _generate_column_lineage_aspect(
+    def _generate_lineage_aspect(
         self, dataset_urn: str, table: Table
     ) -> Optional[UpstreamLineageClass]:
         upstreams: List[UpstreamClass] = []
@@ -318,6 +399,7 @@ def _generate_column_lineage_aspect(
         ):
             upstream_urn = self.gen_dataset_urn(upstream_ref)
 
+            # Should be empty if config.include_column_lineage is False
             finegrained_lineages.extend(
                 FineGrainedLineage(
                     upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
@@ -331,38 +413,28 @@ def _generate_column_lineage_aspect(
                 for d_col, u_cols in sorted(downstream_to_upstream_cols.items())
             )
 
-            upstream_table = UpstreamClass(
-                upstream_urn,
-                DatasetLineageTypeClass.TRANSFORMED,
-            )
-            upstreams.append(upstream_table)
-
-        if upstreams:
-            return UpstreamLineageClass(
-                upstreams=upstreams, fineGrainedLineages=finegrained_lineages
-            )
-        else:
-            return None
-
-    def _generate_lineage_aspect(
-        self, dataset_urn: str, table: Table
-    ) -> Optional[UpstreamLineageClass]:
-        upstreams: List[UpstreamClass] = []
-        for upstream in sorted(table.upstreams.keys()):
-            upstream_urn = make_dataset_urn_with_platform_instance(
-                self.platform,
-                f"{table.schema.catalog.metastore.id}.{upstream}",
-                self.platform_instance_name,
+            upstreams.append(
+                UpstreamClass(
+                    dataset=upstream_urn,
+                    type=DatasetLineageTypeClass.TRANSFORMED,
+                )
             )
 
-            upstream_table = UpstreamClass(
-                upstream_urn,
-                DatasetLineageTypeClass.TRANSFORMED,
+        for notebook in table.upstream_notebooks:
+            upstreams.append(
+                UpstreamClass(
+                    dataset=self.gen_notebook_urn(notebook),
+                    type=DatasetLineageTypeClass.TRANSFORMED,
+                )
             )
-            upstreams.append(upstream_table)
 
         if upstreams:
-            return UpstreamLineageClass(upstreams=upstreams)
+            return UpstreamLineageClass(
+                upstreams=upstreams,
+                fineGrainedLineages=finegrained_lineages
+                if self.config.include_column_lineage
+                else None,
+            )
         else:
             return None
 
@@ -389,6 +461,14 @@ def gen_dataset_urn(self, table_ref: TableReference) -> str:
             name=str(table_ref),
         )
 
+    def gen_notebook_urn(self, notebook: Union[Notebook, NotebookId]) -> str:
+        notebook_id = notebook.id if isinstance(notebook, Notebook) else notebook
+        return NotebookKey(
+            notebook_id=notebook_id,
+            platform=self.platform,
+            instance=self.config.platform_instance,
+        ).as_urn()
+
     def gen_schema_containers(self, schema: Schema) -> Iterable[MetadataWorkUnit]:
         domain_urn = self._gen_domain_urn(f"{schema.catalog.name}.{schema.name}")
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
index 49f56b46fb012..ab21c1a318659 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
@@ -214,12 +214,15 @@ def _resolve_tables(
         self, tables: List[str], table_map: TableMap
     ) -> List[TableReference]:
         """Resolve tables to TableReferences, filtering out unrecognized or unresolvable table names."""
+
+        missing_table = False
+        duplicate_table = False
         output = []
         for table in tables:
             table = str(table)
             if table not in table_map:
                 logger.debug(f"Dropping query with unrecognized table: {table}")
-                self.report.num_queries_dropped_missing_table += 1
+                missing_table = True
             else:
                 refs = table_map[table]
                 if len(refs) == 1:
@@ -228,6 +231,11 @@ def _resolve_tables(
                     logger.warning(
                         f"Could not resolve table ref for {table}: {len(refs)} duplicates."
                     )
-                    self.report.num_queries_dropped_duplicate_table += 1
+                    duplicate_table = True
+
+        if missing_table:
+            self.report.num_queries_missing_table += 1
+        if duplicate_table:
+            self.report.num_queries_duplicate_table += 1
 
         return output

From 301d3e6b1ccffaf946f128766578faddbc7ac44e Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 4 Oct 2023 10:23:13 -0400
Subject: [PATCH 03/98] test(ingest/unity): Add Unity Catalog memory
 performance testing (#8932)

---
 .../ingestion/source/unity/proxy_types.py     |   1 -
 .../tests/performance/bigquery/__init__.py    |   0
 .../bigquery_events.py}                       |   0
 .../{ => bigquery}/test_bigquery_usage.py     |  22 +--
 .../tests/performance/data_generation.py      |  53 ++++-
 .../tests/performance/data_model.py           |  31 ++-
 .../tests/performance/databricks/__init__.py  |   0
 .../performance/databricks/test_unity.py      |  71 +++++++
 .../databricks/unity_proxy_mock.py            | 183 ++++++++++++++++++
 .../tests/performance/helpers.py              |  21 ++
 .../tests/unit/test_bigquery_usage.py         |   7 +-
 11 files changed, 356 insertions(+), 33 deletions(-)
 create mode 100644 metadata-ingestion/tests/performance/bigquery/__init__.py
 rename metadata-ingestion/tests/performance/{bigquery.py => bigquery/bigquery_events.py} (100%)
 rename metadata-ingestion/tests/performance/{ => bigquery}/test_bigquery_usage.py (80%)
 create mode 100644 metadata-ingestion/tests/performance/databricks/__init__.py
 create mode 100644 metadata-ingestion/tests/performance/databricks/test_unity.py
 create mode 100644 metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py
 create mode 100644 metadata-ingestion/tests/performance/helpers.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index d57f20245913f..54ac2e90d7c7e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -175,7 +175,6 @@ class Table(CommonProperty):
     columns: List[Column]
     storage_location: Optional[str]
     data_source_format: Optional[DataSourceFormat]
-    comment: Optional[str]
     table_type: TableType
     owner: Optional[str]
     generation: Optional[int]
diff --git a/metadata-ingestion/tests/performance/bigquery/__init__.py b/metadata-ingestion/tests/performance/bigquery/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/tests/performance/bigquery.py b/metadata-ingestion/tests/performance/bigquery/bigquery_events.py
similarity index 100%
rename from metadata-ingestion/tests/performance/bigquery.py
rename to metadata-ingestion/tests/performance/bigquery/bigquery_events.py
diff --git a/metadata-ingestion/tests/performance/test_bigquery_usage.py b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py
similarity index 80%
rename from metadata-ingestion/tests/performance/test_bigquery_usage.py
rename to metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py
index 7e05ef070b45d..bbc3378450bff 100644
--- a/metadata-ingestion/tests/performance/test_bigquery_usage.py
+++ b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py
@@ -2,13 +2,11 @@
 import os
 import random
 from datetime import timedelta
-from typing import Iterable, Tuple
 
 import humanfriendly
 import psutil
 
 from datahub.emitter.mce_builder import make_dataset_urn
-from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.bigquery_v2.bigquery_config import (
     BigQueryUsageConfig,
     BigQueryV2Config,
@@ -16,12 +14,13 @@
 from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
 from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor
 from datahub.utilities.perf_timer import PerfTimer
-from tests.performance.bigquery import generate_events, ref_from_table
+from tests.performance.bigquery.bigquery_events import generate_events, ref_from_table
 from tests.performance.data_generation import (
     NormalDistribution,
     generate_data,
     generate_queries,
 )
+from tests.performance.helpers import workunit_sink
 
 
 def run_test():
@@ -33,7 +32,7 @@ def run_test():
         num_views=2000,
         time_range=timedelta(days=7),
     )
-    all_tables = seed_metadata.tables + seed_metadata.views
+    all_tables = seed_metadata.all_tables
 
     config = BigQueryV2Config(
         start_time=seed_metadata.start_time,
@@ -88,21 +87,6 @@ def run_test():
     print(f"Hash collisions: {report.num_usage_query_hash_collisions}")
 
 
-def workunit_sink(workunits: Iterable[MetadataWorkUnit]) -> Tuple[int, int]:
-    peak_memory_usage = psutil.Process(os.getpid()).memory_info().rss
-    i: int = 0
-    for i, wu in enumerate(workunits):
-        if i % 10_000 == 0:
-            peak_memory_usage = max(
-                peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
-            )
-    peak_memory_usage = max(
-        peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
-    )
-
-    return i, peak_memory_usage
-
-
 if __name__ == "__main__":
     root_logger = logging.getLogger()
     root_logger.setLevel(logging.INFO)
diff --git a/metadata-ingestion/tests/performance/data_generation.py b/metadata-ingestion/tests/performance/data_generation.py
index c530848f27f5c..67b156896909a 100644
--- a/metadata-ingestion/tests/performance/data_generation.py
+++ b/metadata-ingestion/tests/performance/data_generation.py
@@ -11,11 +11,14 @@
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
-from typing import Iterable, List, TypeVar
+from typing import Iterable, List, TypeVar, Union, cast
 
 from faker import Faker
 
 from tests.performance.data_model import (
+    Column,
+    ColumnMapping,
+    ColumnType,
     Container,
     FieldAccess,
     Query,
@@ -52,15 +55,21 @@ def sample_with_floor(self, floor: int = 1) -> int:
 
 @dataclass
 class SeedMetadata:
-    containers: List[Container]
+    # Each list is a layer of containers, e.g. [[databases], [schemas]]
+    containers: List[List[Container]]
+
     tables: List[Table]
     views: List[View]
     start_time: datetime
     end_time: datetime
 
+    @property
+    def all_tables(self) -> List[Table]:
+        return self.tables + cast(List[Table], self.views)
+
 
 def generate_data(
-    num_containers: int,
+    num_containers: Union[List[int], int],
     num_tables: int,
     num_views: int,
     columns_per_table: NormalDistribution = NormalDistribution(5, 2),
@@ -68,32 +77,52 @@ def generate_data(
     view_definition_length: NormalDistribution = NormalDistribution(150, 50),
     time_range: timedelta = timedelta(days=14),
 ) -> SeedMetadata:
-    containers = [Container(f"container-{i}") for i in range(num_containers)]
+    # Assemble containers
+    if isinstance(num_containers, int):
+        num_containers = [num_containers]
+
+    containers: List[List[Container]] = []
+    for i, num_in_layer in enumerate(num_containers):
+        layer = [
+            Container(
+                f"{i}-container-{j}",
+                parent=random.choice(containers[-1]) if containers else None,
+            )
+            for j in range(num_in_layer)
+        ]
+        containers.append(layer)
+
+    # Assemble tables
     tables = [
         Table(
             f"table-{i}",
-            container=random.choice(containers),
+            container=random.choice(containers[-1]),
             columns=[
                 f"column-{j}-{uuid.uuid4()}"
                 for j in range(columns_per_table.sample_with_floor())
             ],
+            column_mapping=None,
         )
         for i in range(num_tables)
     ]
     views = [
         View(
             f"view-{i}",
-            container=random.choice(containers),
+            container=random.choice(containers[-1]),
             columns=[
                 f"column-{j}-{uuid.uuid4()}"
                 for j in range(columns_per_table.sample_with_floor())
             ],
+            column_mapping=None,
             definition=f"{uuid.uuid4()}-{'*' * view_definition_length.sample_with_floor(10)}",
             parents=random.sample(tables, parents_per_view.sample_with_floor()),
         )
         for i in range(num_views)
     ]
 
+    for table in tables + views:
+        _generate_column_mapping(table)
+
     now = datetime.now(tz=timezone.utc)
     return SeedMetadata(
         containers=containers,
@@ -162,6 +191,18 @@ def generate_queries(
         )
 
 
+def _generate_column_mapping(table: Table) -> ColumnMapping:
+    d = {}
+    for column in table.columns:
+        d[column] = Column(
+            name=column,
+            type=random.choice(list(ColumnType)),
+            nullable=random.random() < 0.1,  # Fixed 10% chance for now
+        )
+    table.column_mapping = d
+    return d
+
+
 def _sample_list(lst: List[T], dist: NormalDistribution, floor: int = 1) -> List[T]:
     return random.sample(lst, min(dist.sample_with_floor(floor), len(lst)))
 
diff --git a/metadata-ingestion/tests/performance/data_model.py b/metadata-ingestion/tests/performance/data_model.py
index c593e69ceb9a7..9425fa827070e 100644
--- a/metadata-ingestion/tests/performance/data_model.py
+++ b/metadata-ingestion/tests/performance/data_model.py
@@ -1,10 +1,10 @@
 from dataclasses import dataclass
 from datetime import datetime
-from typing import List, Optional
+from enum import Enum
+from typing import Dict, List, Optional
 
 from typing_extensions import Literal
 
-Column = str
 StatementType = Literal[  # SELECT + values from OperationTypeClass
     "SELECT",
     "INSERT",
@@ -21,13 +21,36 @@
 @dataclass
 class Container:
     name: str
+    parent: Optional["Container"] = None
+
+
+class ColumnType(str, Enum):
+    # Can add types that take parameters in the future
+
+    INTEGER = "INTEGER"
+    FLOAT = "FLOAT"  # Double precision (64 bit)
+    STRING = "STRING"
+    BOOLEAN = "BOOLEAN"
+    DATETIME = "DATETIME"
+
+
+@dataclass
+class Column:
+    name: str
+    type: ColumnType
+    nullable: bool
+
+
+ColumnRef = str
+ColumnMapping = Dict[ColumnRef, Column]
 
 
 @dataclass
 class Table:
     name: str
     container: Container
-    columns: List[Column]
+    columns: List[ColumnRef]
+    column_mapping: Optional[ColumnMapping]
 
     def is_view(self) -> bool:
         return False
@@ -44,7 +67,7 @@ def is_view(self) -> bool:
 
 @dataclass
 class FieldAccess:
-    column: Column
+    column: ColumnRef
     table: Table
 
 
diff --git a/metadata-ingestion/tests/performance/databricks/__init__.py b/metadata-ingestion/tests/performance/databricks/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/tests/performance/databricks/test_unity.py b/metadata-ingestion/tests/performance/databricks/test_unity.py
new file mode 100644
index 0000000000000..cc9558f0692ed
--- /dev/null
+++ b/metadata-ingestion/tests/performance/databricks/test_unity.py
@@ -0,0 +1,71 @@
+import logging
+import os
+from unittest.mock import patch
+
+import humanfriendly
+import psutil
+
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.source.unity.config import UnityCatalogSourceConfig
+from datahub.ingestion.source.unity.source import UnityCatalogSource
+from datahub.utilities.perf_timer import PerfTimer
+from tests.performance.data_generation import (
+    NormalDistribution,
+    generate_data,
+    generate_queries,
+)
+from tests.performance.databricks.unity_proxy_mock import UnityCatalogApiProxyMock
+from tests.performance.helpers import workunit_sink
+
+
+def run_test():
+    seed_metadata = generate_data(
+        num_containers=[1, 100, 5000],
+        num_tables=50000,
+        num_views=10000,
+        columns_per_table=NormalDistribution(100, 50),
+        parents_per_view=NormalDistribution(5, 5),
+        view_definition_length=NormalDistribution(1000, 300),
+    )
+    queries = generate_queries(
+        seed_metadata,
+        num_selects=100000,
+        num_operations=100000,
+        num_unique_queries=10000,
+        num_users=1000,
+    )
+    proxy_mock = UnityCatalogApiProxyMock(
+        seed_metadata, queries=queries, num_service_principals=10000
+    )
+    print("Data generated")
+
+    config = UnityCatalogSourceConfig(
+        token="", workspace_url="http://localhost:1234", include_usage_statistics=False
+    )
+    ctx = PipelineContext(run_id="test")
+    with patch(
+        "datahub.ingestion.source.unity.source.UnityCatalogApiProxy",
+        lambda *args, **kwargs: proxy_mock,
+    ):
+        source: UnityCatalogSource = UnityCatalogSource(ctx, config)
+
+    pre_mem_usage = psutil.Process(os.getpid()).memory_info().rss
+    print(f"Test data size: {humanfriendly.format_size(pre_mem_usage)}")
+
+    with PerfTimer() as timer:
+        workunits = source.get_workunits()
+        num_workunits, peak_memory_usage = workunit_sink(workunits)
+        print(f"Workunits Generated: {num_workunits}")
+        print(f"Seconds Elapsed: {timer.elapsed_seconds():.2f} seconds")
+
+    print(
+        f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}"
+    )
+    print(source.report.aspects)
+
+
+if __name__ == "__main__":
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.INFO)
+    root_logger.addHandler(logging.StreamHandler())
+    run_test()
diff --git a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py
new file mode 100644
index 0000000000000..593163e12bf0a
--- /dev/null
+++ b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py
@@ -0,0 +1,183 @@
+import uuid
+from collections import defaultdict
+from datetime import datetime, timezone
+from typing import Dict, Iterable, List
+
+from databricks.sdk.service.catalog import ColumnTypeName
+from databricks.sdk.service.sql import QueryStatementType
+
+from datahub.ingestion.source.unity.proxy_types import (
+    Catalog,
+    CatalogType,
+    Column,
+    Metastore,
+    Query,
+    Schema,
+    ServicePrincipal,
+    Table,
+    TableType,
+)
+from tests.performance import data_model
+from tests.performance.data_generation import SeedMetadata
+from tests.performance.data_model import ColumnType, StatementType
+
+
+class UnityCatalogApiProxyMock:
+    """Mimics UnityCatalogApiProxy for performance testing."""
+
+    def __init__(
+        self,
+        seed_metadata: SeedMetadata,
+        queries: Iterable[data_model.Query] = (),
+        num_service_principals: int = 0,
+    ) -> None:
+        self.seed_metadata = seed_metadata
+        self.queries = queries
+        self.num_service_principals = num_service_principals
+        self.warehouse_id = "invalid-warehouse-id"
+
+        # Cache for performance
+        self._schema_to_table: Dict[str, List[data_model.Table]] = defaultdict(list)
+        for table in seed_metadata.all_tables:
+            self._schema_to_table[table.container.name].append(table)
+
+    def check_basic_connectivity(self) -> bool:
+        return True
+
+    def assigned_metastore(self) -> Metastore:
+        container = self.seed_metadata.containers[0][0]
+        return Metastore(
+            id=container.name,
+            name=container.name,
+            global_metastore_id=container.name,
+            metastore_id=container.name,
+            comment=None,
+            owner=None,
+            cloud=None,
+            region=None,
+        )
+
+    def catalogs(self, metastore: Metastore) -> Iterable[Catalog]:
+        for container in self.seed_metadata.containers[1]:
+            if not container.parent or metastore.name != container.parent.name:
+                continue
+
+            yield Catalog(
+                id=f"{metastore.id}.{container.name}",
+                name=container.name,
+                metastore=metastore,
+                comment=None,
+                owner=None,
+                type=CatalogType.MANAGED_CATALOG,
+            )
+
+    def schemas(self, catalog: Catalog) -> Iterable[Schema]:
+        for container in self.seed_metadata.containers[2]:
+            # Assumes all catalog names are unique
+            if not container.parent or catalog.name != container.parent.name:
+                continue
+
+            yield Schema(
+                id=f"{catalog.id}.{container.name}",
+                name=container.name,
+                catalog=catalog,
+                comment=None,
+                owner=None,
+            )
+
+    def tables(self, schema: Schema) -> Iterable[Table]:
+        for table in self._schema_to_table[schema.name]:
+            columns = []
+            if table.column_mapping:
+                for i, col_name in enumerate(table.columns):
+                    column = table.column_mapping[col_name]
+                    columns.append(
+                        Column(
+                            id=column.name,
+                            name=column.name,
+                            type_name=self._convert_column_type(column.type),
+                            type_text=column.type.value,
+                            nullable=column.nullable,
+                            position=i,
+                            comment=None,
+                            type_precision=0,
+                            type_scale=0,
+                        )
+                    )
+
+            yield Table(
+                id=f"{schema.id}.{table.name}",
+                name=table.name,
+                schema=schema,
+                table_type=TableType.VIEW if table.is_view() else TableType.MANAGED,
+                columns=columns,
+                created_at=datetime.now(tz=timezone.utc),
+                comment=None,
+                owner=None,
+                storage_location=None,
+                data_source_format=None,
+                generation=None,
+                created_by="",
+                updated_at=None,
+                updated_by=None,
+                table_id="",
+                view_definition=table.definition
+                if isinstance(table, data_model.View)
+                else None,
+                properties={},
+            )
+
+    def service_principals(self) -> Iterable[ServicePrincipal]:
+        for i in range(self.num_service_principals):
+            yield ServicePrincipal(
+                id=str(i),
+                application_id=str(uuid.uuid4()),
+                display_name=f"user-{i}",
+                active=True,
+            )
+
+    def query_history(
+        self,
+        start_time: datetime,
+        end_time: datetime,
+    ) -> Iterable[Query]:
+        for i, query in enumerate(self.queries):
+            yield Query(
+                query_id=str(i),
+                query_text=query.text,
+                statement_type=self._convert_statement_type(query.type),
+                start_time=query.timestamp,
+                end_time=query.timestamp,
+                user_id=hash(query.actor),
+                user_name=query.actor,
+                executed_as_user_id=hash(query.actor),
+                executed_as_user_name=None,
+            )
+
+    def table_lineage(self, table: Table) -> None:
+        pass
+
+    def get_column_lineage(self, table: Table) -> None:
+        pass
+
+    @staticmethod
+    def _convert_column_type(t: ColumnType) -> ColumnTypeName:
+        if t == ColumnType.INTEGER:
+            return ColumnTypeName.INT
+        elif t == ColumnType.FLOAT:
+            return ColumnTypeName.DOUBLE
+        elif t == ColumnType.STRING:
+            return ColumnTypeName.STRING
+        elif t == ColumnType.BOOLEAN:
+            return ColumnTypeName.BOOLEAN
+        elif t == ColumnType.DATETIME:
+            return ColumnTypeName.TIMESTAMP
+        else:
+            raise ValueError(f"Unknown column type: {t}")
+
+    @staticmethod
+    def _convert_statement_type(t: StatementType) -> QueryStatementType:
+        if t == "CUSTOM" or t == "UNKNOWN":
+            return QueryStatementType.OTHER
+        else:
+            return QueryStatementType[t]
diff --git a/metadata-ingestion/tests/performance/helpers.py b/metadata-ingestion/tests/performance/helpers.py
new file mode 100644
index 0000000000000..eb98e53670c96
--- /dev/null
+++ b/metadata-ingestion/tests/performance/helpers.py
@@ -0,0 +1,21 @@
+import os
+from typing import Iterable, Tuple
+
+import psutil
+
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+
+
+def workunit_sink(workunits: Iterable[MetadataWorkUnit]) -> Tuple[int, int]:
+    peak_memory_usage = psutil.Process(os.getpid()).memory_info().rss
+    i: int = 0
+    for i, wu in enumerate(workunits):
+        if i % 10_000 == 0:
+            peak_memory_usage = max(
+                peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
+            )
+    peak_memory_usage = max(
+        peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
+    )
+
+    return i, peak_memory_usage
diff --git a/metadata-ingestion/tests/unit/test_bigquery_usage.py b/metadata-ingestion/tests/unit/test_bigquery_usage.py
index e06c6fb3fe7e5..1eb5d8b00e27c 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_usage.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_usage.py
@@ -35,7 +35,7 @@
     TimeWindowSizeClass,
 )
 from datahub.testing.compare_metadata_json import diff_metadata_json
-from tests.performance.bigquery import generate_events, ref_from_table
+from tests.performance.bigquery.bigquery_events import generate_events, ref_from_table
 from tests.performance.data_generation import generate_data, generate_queries
 from tests.performance.data_model import Container, FieldAccess, Query, Table, View
 
@@ -45,14 +45,15 @@
 ACTOR_2, ACTOR_2_URN = "b@acryl.io", "urn:li:corpuser:b"
 DATABASE_1 = Container("database_1")
 DATABASE_2 = Container("database_2")
-TABLE_1 = Table("table_1", DATABASE_1, ["id", "name", "age"])
-TABLE_2 = Table("table_2", DATABASE_1, ["id", "table_1_id", "value"])
+TABLE_1 = Table("table_1", DATABASE_1, ["id", "name", "age"], None)
+TABLE_2 = Table("table_2", DATABASE_1, ["id", "table_1_id", "value"], None)
 VIEW_1 = View(
     name="view_1",
     container=DATABASE_1,
     columns=["id", "name", "total"],
     definition="VIEW DEFINITION 1",
     parents=[TABLE_1, TABLE_2],
+    column_mapping=None,
 )
 ALL_TABLES = [TABLE_1, TABLE_2, VIEW_1]
 

From 165aa54d1e6f1a1707f79be3cce39ec06c8a1652 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Wed, 4 Oct 2023 19:24:04 +0200
Subject: [PATCH 04/98] doc: DataHubUpgradeHistory_v1 (#8918)

---
 docs/deploy/confluent-cloud.md |  5 +++++
 docs/how/kafka-config.md       | 17 +++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/docs/deploy/confluent-cloud.md b/docs/deploy/confluent-cloud.md
index 794b55d4686bf..096fd9984f474 100644
--- a/docs/deploy/confluent-cloud.md
+++ b/docs/deploy/confluent-cloud.md
@@ -16,6 +16,11 @@ First, you'll need to create following new topics in the [Confluent Control Cent
 6. (Deprecated) **MetadataChangeEvent_v4**: Metadata change proposal messages
 7. (Deprecated) **MetadataAuditEvent_v4**: Metadata change log messages
 8. (Deprecated) **FailedMetadataChangeEvent_v4**: Failed to process #1 event
+9. **MetadataGraphEvent_v4**:
+10. **MetadataGraphEvent_v4**:
+11. **PlatformEvent_v1**
+12. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup).
+    Note this topic requires special configuration: **Infinite retention**. Also, 1 partition is enough for the occasional traffic.
 
 The first five are the most important, and are explained in more depth in [MCP/MCL](../advanced/mcp-mcl.md). The final topics are
 those which are deprecated but still used under certain circumstances. It is likely that in the future they will be completely 
diff --git a/docs/how/kafka-config.md b/docs/how/kafka-config.md
index f3f81c3d07c01..2f20e8b548f83 100644
--- a/docs/how/kafka-config.md
+++ b/docs/how/kafka-config.md
@@ -52,16 +52,21 @@ Also see [Kafka Connect Security](https://docs.confluent.io/current/connect/secu
 
 By default, DataHub relies on the a set of Kafka topics to operate. By default, they have the following names:
 
-- **MetadataChangeProposal_v1**
-- **FailedMetadataChangeProposal_v1**
-- **MetadataChangeLog_Versioned_v1**
-- **MetadataChangeLog_Timeseries_v1**
-- **DataHubUsageEvent_v1**: User behavior tracking event for UI
+1. **MetadataChangeProposal_v1**
+2. **FailedMetadataChangeProposal_v1**
+3. **MetadataChangeLog_Versioned_v1**
+4. **MetadataChangeLog_Timeseries_v1**
+5. **DataHubUsageEvent_v1**: User behavior tracking event for UI
 6. (Deprecated) **MetadataChangeEvent_v4**: Metadata change proposal messages
 7. (Deprecated) **MetadataAuditEvent_v4**: Metadata change log messages
 8. (Deprecated) **FailedMetadataChangeEvent_v4**: Failed to process #1 event
+9. **MetadataGraphEvent_v4**:
+10. **MetadataGraphEvent_v4**:
+11. **PlatformEvent_v1**:
+12. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup).
+    Note this topic requires special configuration: **Infinite retention**. Also, 1 partition is enough for the occasional traffic.
 
-These topics are discussed at more length in [Metadata Events](../what/mxe.md).
+How Metadata Events relate to these topics is discussed at more length in [Metadata Events](../what/mxe.md).
 
 We've included environment variables to customize the name each of these topics, for cases where an organization has naming rules for your topics.
 

From 3a9452c2072c95cbd7a4bf1270b4ef07abd1b1eb Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Thu, 5 Oct 2023 03:42:00 +0900
Subject: [PATCH 05/98] fix: fix typo on aws guide (#8944)

---
 docs/deploy/aws.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md
index 228fcb51d1a28..e0f57b4a0b0cb 100644
--- a/docs/deploy/aws.md
+++ b/docs/deploy/aws.md
@@ -100,7 +100,7 @@ eksctl create iamserviceaccount \
 Install the TargetGroupBinding custom resource definition by running the following.
 
 ```
-kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller//crds?ref=master"
+kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller/crds?ref=master"
 ```
 
 Add the helm chart repository containing the latest version of the ALB controller.

From e2afd44bfeb287e8365b99bc7677d06e4172643b Mon Sep 17 00:00:00 2001
From: ethan-cartwright <ethan.cartwright.m@gmail.com>
Date: Wed, 4 Oct 2023 16:38:58 -0400
Subject: [PATCH 06/98] feat(dbt-ingestion): add documentation link from dbt
 source to institutionalMemory (#8686)

Co-authored-by: Ethan Cartwright <ethan.cartwright@acryl.io>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../docs/sources/dbt/dbt-cloud_recipe.yml     |  8 +--
 metadata-ingestion/docs/sources/dbt/dbt.md    |  7 ++
 .../ingestion/source/dbt/dbt_common.py        |  6 ++
 .../src/datahub/utilities/mapping.py          | 67 ++++++++++++++++++-
 metadata-ingestion/tests/unit/test_mapping.py | 41 ++++++++++++
 5 files changed, 123 insertions(+), 6 deletions(-)

diff --git a/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml b/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml
index 113303cfc1ad4..ef0776b189ca9 100644
--- a/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml
+++ b/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml
@@ -6,14 +6,14 @@ source:
     # In the URL https://cloud.getdbt.com/next/deploy/107298/projects/175705/jobs/148094,
     # 107298 is the account_id, 175705 is the project_id, and 148094 is the job_id
 
-    account_id: # set to your dbt cloud account id
-    project_id: # set to your dbt cloud project id
-    job_id: # set to your dbt cloud job id
+    account_id: "${DBT_ACCOUNT_ID}" # set to your dbt cloud account id
+    project_id: "${DBT_PROJECT_ID}" # set to your dbt cloud project id
+    job_id: "${DBT_JOB_ID}" # set to your dbt cloud job id
     run_id: # set to your dbt cloud run id. This is optional, and defaults to the latest run
 
     target_platform: postgres
 
     # Options
-    target_platform: "my_target_platform_id" # e.g. bigquery/postgres/etc.
+    target_platform: "${TARGET_PLATFORM_ID}" # e.g. bigquery/postgres/etc.
 
 # sink configs
diff --git a/metadata-ingestion/docs/sources/dbt/dbt.md b/metadata-ingestion/docs/sources/dbt/dbt.md
index bfc3ebd5bb350..43ced13c3b1f8 100644
--- a/metadata-ingestion/docs/sources/dbt/dbt.md
+++ b/metadata-ingestion/docs/sources/dbt/dbt.md
@@ -38,6 +38,12 @@ meta_mapping:
     operation: "add_terms"
     config:
       separator: ","
+  documentation_link:
+    match: "(?:https?)?\:\/\/\w*[^#]*"
+    operation: "add_doc_link"
+    config:
+      link: {{ $match }}
+      description: "Documentation Link"
 column_meta_mapping:
   terms_list:
     match: ".*"
@@ -57,6 +63,7 @@ We support the following operations:
 2. add_term - Requires `term` property in config.
 3. add_terms - Accepts an optional `separator` property in config.
 4. add_owner - Requires `owner_type` property in config which can be either user or group. Optionally accepts the `owner_category` config property which you can set to one of `['TECHNICAL_OWNER', 'BUSINESS_OWNER', 'DATA_STEWARD', 'DATAOWNER'` (defaults to `DATAOWNER`).
+5. add_doc_link - Requires `link` and `description` properties in config. Upon ingestion run, this will overwrite current links in the institutional knowledge section with this new link. The anchor text is defined here in the meta_mappings as `description`.
 
 Note:
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 782d94f39e8a5..3edeb695e9f21 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -1188,9 +1188,15 @@ def _generate_base_aspects(
         ):
             aspects.append(meta_aspects.get(Constants.ADD_TERM_OPERATION))
 
+        # add meta links aspect
+        meta_links_aspect = meta_aspects.get(Constants.ADD_DOC_LINK_OPERATION)
+        if meta_links_aspect and self.config.enable_meta_mapping:
+            aspects.append(meta_links_aspect)
+
         # add schema metadata aspect
         schema_metadata = self.get_schema_metadata(self.report, node, mce_platform)
         aspects.append(schema_metadata)
+
         return aspects
 
     def get_schema_metadata(
diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 793eccfb22c7e..eb2d975ee607f 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -2,12 +2,16 @@
 import logging
 import operator
 import re
+import time
 from functools import reduce
-from typing import Any, Dict, List, Match, Optional, Union
+from typing import Any, Dict, List, Match, Optional, Union, cast
 
 from datahub.emitter import mce_builder
 from datahub.emitter.mce_builder import OwnerType
 from datahub.metadata.schema_classes import (
+    AuditStampClass,
+    InstitutionalMemoryClass,
+    InstitutionalMemoryMetadataClass,
     OwnerClass,
     OwnershipClass,
     OwnershipSourceClass,
@@ -39,6 +43,7 @@ def _insert_match_value(original_value: str, match_value: str) -> str:
 
 
 class Constants:
+    ADD_DOC_LINK_OPERATION = "add_doc_link"
     ADD_TAG_OPERATION = "add_tag"
     ADD_TERM_OPERATION = "add_term"
     ADD_TERMS_OPERATION = "add_terms"
@@ -47,6 +52,8 @@ class Constants:
     OPERATION_CONFIG = "config"
     TAG = "tag"
     TERM = "term"
+    DOC_LINK = "link"
+    DOC_DESCRIPTION = "description"
     OWNER_TYPE = "owner_type"
     OWNER_CATEGORY = "owner_category"
     MATCH = "match"
@@ -163,7 +170,6 @@ def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]:
                             )
                             operations_value_list.append(operation)  # type: ignore
                             operations_map[operation_type] = operations_value_list
-
             aspect_map = self.convert_to_aspects(operations_map)
         except Exception as e:
             logger.error(f"Error while processing operation defs over raw_props: {e}")
@@ -173,6 +179,7 @@ def convert_to_aspects(
         self, operation_map: Dict[str, Union[set, list]]
     ) -> Dict[str, Any]:
         aspect_map: Dict[str, Any] = {}
+
         if Constants.ADD_TAG_OPERATION in operation_map:
             tag_aspect = mce_builder.make_global_tag_aspect_with_tag_list(
                 sorted(operation_map[Constants.ADD_TAG_OPERATION])
@@ -195,11 +202,57 @@ def convert_to_aspects(
                 ]
             )
             aspect_map[Constants.ADD_OWNER_OPERATION] = owner_aspect
+
         if Constants.ADD_TERM_OPERATION in operation_map:
             term_aspect = mce_builder.make_glossary_terms_aspect_from_urn_list(
                 sorted(operation_map[Constants.ADD_TERM_OPERATION])
             )
             aspect_map[Constants.ADD_TERM_OPERATION] = term_aspect
+
+        if Constants.ADD_DOC_LINK_OPERATION in operation_map:
+            try:
+                if len(
+                    operation_map[Constants.ADD_DOC_LINK_OPERATION]
+                ) == 1 and isinstance(
+                    operation_map[Constants.ADD_DOC_LINK_OPERATION], list
+                ):
+                    docs_dict = cast(
+                        List[Dict], operation_map[Constants.ADD_DOC_LINK_OPERATION]
+                    )[0]
+                    if "description" not in docs_dict or "link" not in docs_dict:
+                        raise Exception(
+                            "Documentation_link meta_mapping config needs a description key and a link key"
+                        )
+
+                    now = int(time.time() * 1000)  # milliseconds since epoch
+                    institutional_memory_element = InstitutionalMemoryMetadataClass(
+                        url=docs_dict["link"],
+                        description=docs_dict["description"],
+                        createStamp=AuditStampClass(
+                            time=now, actor="urn:li:corpuser:ingestion"
+                        ),
+                    )
+
+                    # create a new institutional memory aspect
+                    institutional_memory_aspect = InstitutionalMemoryClass(
+                        elements=[institutional_memory_element]
+                    )
+
+                    aspect_map[
+                        Constants.ADD_DOC_LINK_OPERATION
+                    ] = institutional_memory_aspect
+                else:
+                    raise Exception(
+                        f"Expected 1 item of type list for the documentation_link meta_mapping config,"
+                        f" received type of {type(operation_map[Constants.ADD_DOC_LINK_OPERATION])}"
+                        f", and size of {len(operation_map[Constants.ADD_DOC_LINK_OPERATION])}."
+                    )
+
+            except Exception as e:
+                logger.error(
+                    f"Error while constructing aspect for documentation link and description : {e}"
+                )
+
         return aspect_map
 
     def get_operation_value(
@@ -248,6 +301,16 @@ def get_operation_value(
             term = operation_config[Constants.TERM]
             term = _insert_match_value(term, _get_best_match(match, "term"))
             return mce_builder.make_term_urn(term)
+        elif (
+            operation_type == Constants.ADD_DOC_LINK_OPERATION
+            and operation_config[Constants.DOC_LINK]
+            and operation_config[Constants.DOC_DESCRIPTION]
+        ):
+            link = operation_config[Constants.DOC_LINK]
+            link = _insert_match_value(link, _get_best_match(match, "link"))
+            description = operation_config[Constants.DOC_DESCRIPTION]
+            return {"link": link, "description": description}
+
         elif operation_type == Constants.ADD_TERMS_OPERATION:
             separator = operation_config.get(Constants.SEPARATOR, ",")
             captured_terms = match.group(0)
diff --git a/metadata-ingestion/tests/unit/test_mapping.py b/metadata-ingestion/tests/unit/test_mapping.py
index d69dd4a8a96b0..5c258f16535f8 100644
--- a/metadata-ingestion/tests/unit/test_mapping.py
+++ b/metadata-ingestion/tests/unit/test_mapping.py
@@ -4,6 +4,7 @@
 from datahub.metadata.schema_classes import (
     GlobalTagsClass,
     GlossaryTermsClass,
+    InstitutionalMemoryClass,
     OwnerClass,
     OwnershipClass,
     OwnershipSourceTypeClass,
@@ -233,6 +234,46 @@ def test_operation_processor_advanced_matching_tags():
     assert tag_aspect.tags[0].tag == "urn:li:tag:case_4567"
 
 
+def test_operation_processor_institutional_memory():
+    raw_props = {
+        "documentation_link": "https://test.com/documentation#ignore-this",
+    }
+    processor = OperationProcessor(
+        operation_defs={
+            "documentation_link": {
+                "match": r"(?:https?)?\:\/\/\w*[^#]*",
+                "operation": "add_doc_link",
+                "config": {"link": "{{ $match }}", "description": "test"},
+            },
+        },
+    )
+    aspect_map = processor.process(raw_props)
+    assert "add_doc_link" in aspect_map
+
+    doc_link_aspect: InstitutionalMemoryClass = aspect_map["add_doc_link"]
+
+    assert doc_link_aspect.elements[0].url == "https://test.com/documentation"
+    assert doc_link_aspect.elements[0].description == "test"
+
+
+def test_operation_processor_institutional_memory_no_description():
+    raw_props = {
+        "documentation_link": "test.com/documentation#ignore-this",
+    }
+    processor = OperationProcessor(
+        operation_defs={
+            "documentation_link": {
+                "match": r"(?:https?)?\:\/\/\w*[^#]*",
+                "operation": "add_doc_link",
+                "config": {"link": "{{ $match }}"},
+            },
+        },
+    )
+    # we require a description, so this should stay empty
+    aspect_map = processor.process(raw_props)
+    assert aspect_map == {}
+
+
 def test_operation_processor_matching_nested_props():
     raw_props = {
         "gdpr": {

From 0f8d2757352597ceaed62b93547381255dbc650e Mon Sep 17 00:00:00 2001
From: John Joyce <john@acryl.io>
Date: Wed, 4 Oct 2023 20:03:40 -0700
Subject: [PATCH 07/98] refactor(style): Improve search bar input focus +
 styling (#8955)

---
 .../src/app/search/SearchBar.tsx              | 46 +++++++++++--------
 .../src/app/shared/admin/HeaderLinks.tsx      | 28 +++++------
 .../src/conf/theme/theme_dark.config.json     |  4 +-
 .../src/conf/theme/theme_light.config.json    |  4 +-
 4 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx
index fb10e1ca0026e..b4699994bc460 100644
--- a/datahub-web-react/src/app/search/SearchBar.tsx
+++ b/datahub-web-react/src/app/search/SearchBar.tsx
@@ -6,7 +6,7 @@ import { useHistory } from 'react-router';
 import { AutoCompleteResultForEntity, EntityType, FacetFilterInput, ScenarioType } from '../../types.generated';
 import EntityRegistry from '../entity/EntityRegistry';
 import filterSearchQuery from './utils/filterSearchQuery';
-import { ANTD_GRAY, ANTD_GRAY_V2 } from '../entity/shared/constants';
+import { ANTD_GRAY, ANTD_GRAY_V2, REDESIGN_COLORS } from '../entity/shared/constants';
 import { getEntityPath } from '../entity/shared/containers/profile/utils';
 import { EXACT_SEARCH_PREFIX } from './utils/constants';
 import { useListRecommendationsQuery } from '../../graphql/recommendations.generated';
@@ -20,7 +20,6 @@ import RecommendedOption from './autoComplete/RecommendedOption';
 import SectionHeader, { EntityTypeLabel } from './autoComplete/SectionHeader';
 import { useUserContext } from '../context/useUserContext';
 import { navigateToSearchUrl } from './utils/navigateToSearchUrl';
-import { getQuickFilterDetails } from './autoComplete/quickFilters/utils';
 import ViewAllSearchItem from './ViewAllSearchItem';
 import { ViewSelect } from '../entity/view/select/ViewSelect';
 import { combineSiblingsInAutoComplete } from './utils/combineSiblingsInAutoComplete';
@@ -39,13 +38,14 @@ const StyledSearchBar = styled(Input)`
     &&& {
         border-radius: 70px;
         height: 40px;
-        font-size: 20px;
-        color: ${ANTD_GRAY[7]};
-        background-color: ${ANTD_GRAY_V2[2]};
-    }
-    > .ant-input {
         font-size: 14px;
+        color: ${ANTD_GRAY[7]};
         background-color: ${ANTD_GRAY_V2[2]};
+        border: 2px solid transparent;
+
+        &:focus-within {
+            border: 1.5px solid ${REDESIGN_COLORS.BLUE};
+        }
     }
     > .ant-input::placeholder {
         color: ${ANTD_GRAY_V2[10]};
@@ -203,23 +203,16 @@ export const SearchBar = ({
     const { quickFilters, selectedQuickFilter, setSelectedQuickFilter } = useQuickFiltersContext();
 
     const autoCompleteQueryOptions = useMemo(() => {
-        const query = suggestions.length ? effectiveQuery : '';
-        const selectedQuickFilterLabel =
-            showQuickFilters && selectedQuickFilter
-                ? getQuickFilterDetails(selectedQuickFilter, entityRegistry).label
-                : '';
-        const text = query || selectedQuickFilterLabel;
-
-        if (!text) return [];
+        if (effectiveQuery === '') return [];
 
         return [
             {
-                value: `${EXACT_SEARCH_PREFIX}${text}`,
-                label: <ViewAllSearchItem searchTarget={text} />,
+                value: `${EXACT_SEARCH_PREFIX}${effectiveQuery}`,
+                label: <ViewAllSearchItem searchTarget={effectiveQuery} />,
                 type: EXACT_AUTOCOMPLETE_OPTION_TYPE,
             },
         ];
-    }, [showQuickFilters, suggestions.length, effectiveQuery, selectedQuickFilter, entityRegistry]);
+    }, [effectiveQuery]);
 
     const autoCompleteEntityOptions = useMemo(() => {
         return suggestions.map((suggestion: AutoCompleteResultForEntity) => {
@@ -296,6 +289,22 @@ export const SearchBar = ({
         }
     }
 
+    const searchInputRef = useRef(null);
+
+    useEffect(() => {
+        const handleKeyDown = (event) => {
+            // Support command-k to select the search bar.
+            // 75 is the keyCode for 'k'
+            if ((event.metaKey || event.ctrlKey) && event.keyCode === 75) {
+                (searchInputRef?.current as any)?.focus();
+            }
+        };
+        document.addEventListener('keydown', handleKeyDown);
+        return () => {
+            document.removeEventListener('keydown', handleKeyDown);
+        };
+    }, []);
+
     return (
         <AutoCompleteContainer style={style} ref={searchBarWrapperRef}>
             <StyledAutoComplete
@@ -399,6 +408,7 @@ export const SearchBar = ({
                             />
                         </>
                     }
+                    ref={searchInputRef}
                 />
             </StyledAutoComplete>
         </AutoCompleteContainer>
diff --git a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
index ced7d8642576b..ce1ad93565ba4 100644
--- a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
+++ b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
@@ -93,20 +93,6 @@ export function HeaderLinks(props: Props) {
                     </Link>
                 </LinkWrapper>
             )}
-            {showIngestion && (
-                <LinkWrapper>
-                    <Link to="/ingestion">
-                        <Button id={HOME_PAGE_INGESTION_ID} type="text">
-                            <Tooltip title="Connect DataHub to your organization's data sources">
-                                <NavTitleContainer>
-                                    <ApiOutlined />
-                                    <NavTitleText>Ingestion</NavTitleText>
-                                </NavTitleContainer>
-                            </Tooltip>
-                        </Button>
-                    </Link>
-                </LinkWrapper>
-            )}
             <Dropdown
                 trigger={['click']}
                 overlay={
@@ -145,6 +131,20 @@ export function HeaderLinks(props: Props) {
                     </Button>
                 </LinkWrapper>
             </Dropdown>
+            {showIngestion && (
+                <LinkWrapper>
+                    <Link to="/ingestion">
+                        <Button id={HOME_PAGE_INGESTION_ID} type="text">
+                            <Tooltip title="Connect DataHub to your organization's data sources">
+                                <NavTitleContainer>
+                                    <ApiOutlined />
+                                    <NavTitleText>Ingestion</NavTitleText>
+                                </NavTitleContainer>
+                            </Tooltip>
+                        </Button>
+                    </Link>
+                </LinkWrapper>
+            )}
             {showSettings && (
                 <LinkWrapper style={{ marginRight: 12 }}>
                     <Link to="/settings">
diff --git a/datahub-web-react/src/conf/theme/theme_dark.config.json b/datahub-web-react/src/conf/theme/theme_dark.config.json
index 9746c3ddde5f3..54ebebd3b692b 100644
--- a/datahub-web-react/src/conf/theme/theme_dark.config.json
+++ b/datahub-web-react/src/conf/theme/theme_dark.config.json
@@ -30,7 +30,7 @@
             "homepageMessage": "Find data you can count(*) on"
         },
         "search": {
-            "searchbarMessage": "Search Datasets, People, & more..."
+            "searchbarMessage": "Search Tables, Dashboards, People, & more..."
         },
         "menu": {
             "items": [
@@ -52,4 +52,4 @@
             ]
         }
     }
-}
+}
\ No newline at end of file
diff --git a/datahub-web-react/src/conf/theme/theme_light.config.json b/datahub-web-react/src/conf/theme/theme_light.config.json
index 906c04e38a1ba..6b9ef3eac52b0 100644
--- a/datahub-web-react/src/conf/theme/theme_light.config.json
+++ b/datahub-web-react/src/conf/theme/theme_light.config.json
@@ -33,7 +33,7 @@
             "homepageMessage": "Find data you can count on"
         },
         "search": {
-            "searchbarMessage": "Search Datasets, People, & more..."
+            "searchbarMessage": "Search Tables, Dashboards, People, & more..."
         },
         "menu": {
             "items": [
@@ -60,4 +60,4 @@
             ]
         }
     }
-}
+}
\ No newline at end of file

From 817c371fbf8f8287480a2150925e9526a28f1f6e Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 4 Oct 2023 23:11:06 -0400
Subject: [PATCH 08/98] feat: data contracts models + CLI (#8923)

Co-authored-by: Shirshanka Das <shirshanka@apache.org>
Co-authored-by: John Joyce <john@acryl.io>
---
 .../linkedin/datahub/graphql/TestUtils.java   |   3 +
 .../test/resources/test-entity-registry.yaml  |   8 +
 .../pet_of_the_week.dhub.dc.yaml              |  21 ++
 .../api/entities/datacontract/__init__.py     |   0
 .../datacontract/data_quality_assertion.py    | 107 +++++++++
 .../api/entities/datacontract/datacontract.py | 213 ++++++++++++++++++
 .../datacontract/freshness_assertion.py       |  86 +++++++
 .../entities/datacontract/schema_assertion.py |  81 +++++++
 .../datahub/cli/specific/datacontract_cli.py  |  80 +++++++
 .../src/datahub/cli/specific/file_loader.py   |  26 +--
 .../src/datahub/emitter/mce_builder.py        |  24 +-
 .../src/datahub/emitter/mcp_builder.py        |  27 +--
 metadata-ingestion/src/datahub/entrypoints.py |   2 +
 .../src/datahub/ingestion/api/closeable.py    |   8 +-
 .../ingestion/source/dbt/dbt_common.py        |  28 ++-
 .../integrations/great_expectations/action.py |  19 +-
 .../tests/unit/test_mcp_builder.py            |   3 +-
 .../linkedin/assertion/AssertionAction.pdl    |  22 ++
 .../linkedin/assertion/AssertionActions.pdl   |  18 ++
 .../com/linkedin/assertion/AssertionInfo.pdl  |  49 +++-
 .../linkedin/assertion/AssertionResult.pdl    |  18 +-
 .../assertion/AssertionResultError.pdl        |  45 ++++
 .../linkedin/assertion/AssertionRunEvent.pdl  |  57 +++--
 .../linkedin/assertion/AssertionSource.pdl    |  27 +++
 .../assertion/AssertionStdAggregation.pdl     |  10 +-
 .../assertion/AssertionValueChangeType.pdl    |  16 ++
 .../com/linkedin/assertion/AuditLogSpec.pdl   |  18 ++
 .../assertion/DatasetAssertionInfo.pdl        |  19 +-
 .../assertion/FixedIntervalSchedule.pdl       |  10 +
 .../assertion/FreshnessAssertionInfo.pdl      |  53 +++++
 .../assertion/FreshnessAssertionSchedule.pdl  |  66 ++++++
 .../assertion/FreshnessCronSchedule.pdl       |  25 ++
 .../linkedin/assertion/FreshnessFieldKind.pdl |  17 ++
 .../linkedin/assertion/FreshnessFieldSpec.pdl |  14 ++
 .../IncrementingSegmentFieldTransformer.pdl   |  60 +++++
 .../IncrementingSegmentRowCountChange.pdl     |  33 +++
 .../IncrementingSegmentRowCountTotal.pdl      |  27 +++
 .../assertion/IncrementingSegmentSpec.pdl     |  33 +++
 .../com/linkedin/assertion/RowCountChange.pdl |  27 +++
 .../com/linkedin/assertion/RowCountTotal.pdl  |  22 ++
 .../assertion/SchemaAssertionInfo.pdl         |  29 +++
 .../assertion/VolumeAssertionInfo.pdl         |  82 +++++++
 .../datacontract/DataContractProperties.pdl   |  59 +++++
 .../datacontract/DataContractStatus.pdl       |  27 +++
 .../datacontract/DataQualityContract.pdl      |  16 ++
 .../datacontract/FreshnessContract.pdl        |  13 ++
 .../linkedin/datacontract/SchemaContract.pdl  |  13 ++
 .../com/linkedin/dataset/DatasetFilter.pdl    |  30 +++
 .../linkedin/metadata/key/DataContractKey.pdl |  14 ++
 .../com/linkedin/schema/SchemaFieldSpec.pdl   |  21 ++
 .../src/main/resources/entity-registry.yml    |   9 +
 51 files changed, 1641 insertions(+), 94 deletions(-)
 create mode 100644 metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/__init__.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
 create mode 100644 metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl

diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java
index 272a93fa1989c..606123cac926d 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java
@@ -8,6 +8,7 @@
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
@@ -21,6 +22,8 @@
 public class TestUtils {
 
   public static EntityService getMockEntityService() {
+    PathSpecBasedSchemaAnnotationVisitor.class.getClassLoader()
+        .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false);
     EntityRegistry registry = new ConfigEntityRegistry(TestUtils.class.getResourceAsStream("/test-entity-registry.yaml"));
     EntityService mockEntityService = Mockito.mock(EntityService.class);
     Mockito.when(mockEntityService.getEntityRegistry()).thenReturn(registry);
diff --git a/datahub-graphql-core/src/test/resources/test-entity-registry.yaml b/datahub-graphql-core/src/test/resources/test-entity-registry.yaml
index d694ae53ac42f..efd75a7fb07f5 100644
--- a/datahub-graphql-core/src/test/resources/test-entity-registry.yaml
+++ b/datahub-graphql-core/src/test/resources/test-entity-registry.yaml
@@ -181,6 +181,7 @@ entities:
     - assertionInfo
     - dataPlatformInstance
     - assertionRunEvent
+    - assertionActions
     - status
 - name: dataHubRetention
   category: internal
@@ -292,4 +293,11 @@ entities:
   aspects:
     - ownershipTypeInfo
     - status
+- name: dataContract
+  category: core
+  keyAspect: dataContractKey
+  aspects:
+    - dataContractProperties
+    - dataContractStatus
+    - status
 events:
diff --git a/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml b/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml
new file mode 100644
index 0000000000000..c73904403f678
--- /dev/null
+++ b/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml
@@ -0,0 +1,21 @@
+# id: pet_details_dc # Optional: This is the unique identifier for the data contract
+display_name: Data Contract for SampleHiveDataset
+entity: urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)
+freshness:
+  time: 0700
+  granularity: DAILY
+schema:
+  properties:
+    field_foo:
+      type: string
+      native_type: VARCHAR(100)
+    field_bar:
+      type: boolean
+  required:
+    - field_bar
+data_quality:
+  - type: column_range
+    config:
+      column: field_foo
+      min: 0
+      max: 100
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/__init__.py b/metadata-ingestion/src/datahub/api/entities/datacontract/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
new file mode 100644
index 0000000000000..a665e95e93c43
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
@@ -0,0 +1,107 @@
+from typing import List, Optional, Union
+
+import pydantic
+from typing_extensions import Literal
+
+import datahub.emitter.mce_builder as builder
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionStdAggregationClass,
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+    AssertionTypeClass,
+    DatasetAssertionInfoClass,
+    DatasetAssertionScopeClass,
+)
+
+
+class IdConfigMixin(ConfigModel):
+    id_raw: Optional[str] = pydantic.Field(
+        default=None,
+        alias="id",
+        description="The id of the assertion. If not provided, one will be generated using the type.",
+    )
+
+    def generate_default_id(self) -> str:
+        raise NotImplementedError
+
+
+class CustomSQLAssertion(IdConfigMixin, ConfigModel):
+    type: Literal["custom_sql"]
+
+    sql: str
+
+    def generate_dataset_assertion_info(
+        self, entity_urn: str
+    ) -> DatasetAssertionInfoClass:
+        return DatasetAssertionInfoClass(
+            dataset=entity_urn,
+            scope=DatasetAssertionScopeClass.UNKNOWN,
+            fields=[],
+            operator=AssertionStdOperatorClass._NATIVE_,
+            aggregation=AssertionStdAggregationClass._NATIVE_,
+            logic=self.sql,
+        )
+
+
+class ColumnUniqueAssertion(IdConfigMixin, ConfigModel):
+    type: Literal["unique"]
+
+    # TODO: support multiple columns?
+    column: str
+
+    def generate_default_id(self) -> str:
+        return f"{self.type}-{self.column}"
+
+    def generate_dataset_assertion_info(
+        self, entity_urn: str
+    ) -> DatasetAssertionInfoClass:
+        return DatasetAssertionInfoClass(
+            dataset=entity_urn,
+            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+            fields=[builder.make_schema_field_urn(entity_urn, self.column)],
+            operator=AssertionStdOperatorClass.EQUAL_TO,
+            aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,  # purposely using the misspelled version to work with gql
+            parameters=AssertionStdParametersClass(
+                value=AssertionStdParameterClass(
+                    value="1", type=AssertionStdParameterTypeClass.NUMBER
+                )
+            ),
+        )
+
+
+class DataQualityAssertion(ConfigModel):
+    __root__: Union[
+        CustomSQLAssertion,
+        ColumnUniqueAssertion,
+    ] = pydantic.Field(discriminator="type")
+
+    @property
+    def id(self) -> str:
+        if self.__root__.id_raw:
+            return self.__root__.id_raw
+        try:
+            return self.__root__.generate_default_id()
+        except NotImplementedError:
+            return self.__root__.type
+
+    def generate_mcp(
+        self, assertion_urn: str, entity_urn: str
+    ) -> List[MetadataChangeProposalWrapper]:
+        dataset_assertion_info = self.__root__.generate_dataset_assertion_info(
+            entity_urn
+        )
+
+        return [
+            MetadataChangeProposalWrapper(
+                entityUrn=assertion_urn,
+                aspect=AssertionInfoClass(
+                    type=AssertionTypeClass.DATASET,
+                    datasetAssertion=dataset_assertion_info,
+                ),
+            )
+        ]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
new file mode 100644
index 0000000000000..2df446623a9d6
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
@@ -0,0 +1,213 @@
+import collections
+from typing import Iterable, List, Optional, Tuple
+
+import pydantic
+from ruamel.yaml import YAML
+from typing_extensions import Literal
+
+import datahub.emitter.mce_builder as builder
+from datahub.api.entities.datacontract.data_quality_assertion import (
+    DataQualityAssertion,
+)
+from datahub.api.entities.datacontract.freshness_assertion import FreshnessAssertion
+from datahub.api.entities.datacontract.schema_assertion import SchemaAssertion
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mce_builder import datahub_guid, make_assertion_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    DataContractPropertiesClass,
+    DataContractStateClass,
+    DataContractStatusClass,
+    DataQualityContractClass,
+    FreshnessContractClass,
+    SchemaContractClass,
+    StatusClass,
+)
+from datahub.utilities.urns.urn import guess_entity_type
+
+
+class DataContract(ConfigModel):
+    """A yml representation of a Data Contract.
+
+    This model is used as a simpler, Python-native representation of a DataHub data contract.
+    It can be easily parsed from a YAML file, and can be easily converted into series of MCPs
+    that can be emitted to DataHub.
+    """
+
+    version: Literal[1]
+
+    id: Optional[str] = pydantic.Field(
+        default=None,
+        alias="urn",
+        description="The data contract urn. If not provided, one will be generated.",
+    )
+    entity: str = pydantic.Field(
+        description="The entity urn that the Data Contract is associated with"
+    )
+    # TODO: add support for properties
+    # properties: Optional[Dict[str, str]] = None
+
+    schema_field: Optional[SchemaAssertion] = pydantic.Field(
+        default=None, alias="schema"
+    )
+
+    freshness: Optional[FreshnessAssertion] = pydantic.Field(default=None)
+
+    # TODO: Add a validator to ensure that ids are unique
+    data_quality: Optional[List[DataQualityAssertion]] = None
+
+    _original_yaml_dict: Optional[dict] = None
+
+    @pydantic.validator("data_quality")
+    def validate_data_quality(
+        cls, data_quality: Optional[List[DataQualityAssertion]]
+    ) -> Optional[List[DataQualityAssertion]]:
+        if data_quality:
+            # Raise an error if there are duplicate ids.
+            id_counts = collections.Counter(dq_check.id for dq_check in data_quality)
+            duplicates = [id for id, count in id_counts.items() if count > 1]
+
+            if duplicates:
+                raise ValueError(
+                    f"Got multiple data quality tests with the same type or ID: {duplicates}. Set a unique ID for each data quality test."
+                )
+
+        return data_quality
+
+    @property
+    def urn(self) -> str:
+        if self.id:
+            assert guess_entity_type(self.id) == "dataContract"
+            return self.id
+
+        # Data contract urns are stable
+        guid_obj = {"entity": self.entity}
+        urn = f"urn:li:dataContract:{datahub_guid(guid_obj)}"
+        return urn
+
+    def _generate_freshness_assertion(
+        self, freshness: FreshnessAssertion
+    ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
+        guid_dict = {
+            "contract": self.urn,
+            "entity": self.entity,
+            "freshness": freshness.id,
+        }
+        assertion_urn = builder.make_assertion_urn(builder.datahub_guid(guid_dict))
+
+        return (
+            assertion_urn,
+            freshness.generate_mcp(assertion_urn, self.entity),
+        )
+
+    def _generate_schema_assertion(
+        self, schema_metadata: SchemaAssertion
+    ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
+        # ingredients for guid -> the contract id, the fact that this is a schema assertion and the entity on which the assertion is made
+        guid_dict = {
+            "contract": self.urn,
+            "entity": self.entity,
+            "schema": schema_metadata.id,
+        }
+        assertion_urn = make_assertion_urn(datahub_guid(guid_dict))
+
+        return (
+            assertion_urn,
+            schema_metadata.generate_mcp(assertion_urn, self.entity),
+        )
+
+    def _generate_data_quality_assertion(
+        self, data_quality: DataQualityAssertion
+    ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
+        guid_dict = {
+            "contract": self.urn,
+            "entity": self.entity,
+            "data_quality": data_quality.id,
+        }
+        assertion_urn = make_assertion_urn(datahub_guid(guid_dict))
+
+        return (
+            assertion_urn,
+            data_quality.generate_mcp(assertion_urn, self.entity),
+        )
+
+    def _generate_dq_assertions(
+        self, data_quality_spec: List[DataQualityAssertion]
+    ) -> Tuple[List[str], List[MetadataChangeProposalWrapper]]:
+        assertion_urns = []
+        assertion_mcps = []
+
+        for dq_check in data_quality_spec:
+            assertion_urn, assertion_mcp = self._generate_data_quality_assertion(
+                dq_check
+            )
+
+            assertion_urns.append(assertion_urn)
+            assertion_mcps.extend(assertion_mcp)
+
+        return (assertion_urns, assertion_mcps)
+
+    def generate_mcp(
+        self,
+    ) -> Iterable[MetadataChangeProposalWrapper]:
+        schema_assertion_urn = None
+        if self.schema_field is not None:
+            (
+                schema_assertion_urn,
+                schema_assertion_mcps,
+            ) = self._generate_schema_assertion(self.schema_field)
+            yield from schema_assertion_mcps
+
+        freshness_assertion_urn = None
+        if self.freshness:
+            (
+                freshness_assertion_urn,
+                sla_assertion_mcps,
+            ) = self._generate_freshness_assertion(self.freshness)
+            yield from sla_assertion_mcps
+
+        dq_assertions, dq_assertion_mcps = self._generate_dq_assertions(
+            self.data_quality or []
+        )
+        yield from dq_assertion_mcps
+
+        # Now that we've generated the assertions, we can generate
+        # the actual data contract.
+        yield from MetadataChangeProposalWrapper.construct_many(
+            entityUrn=self.urn,
+            aspects=[
+                DataContractPropertiesClass(
+                    entity=self.entity,
+                    schema=[SchemaContractClass(assertion=schema_assertion_urn)]
+                    if schema_assertion_urn
+                    else None,
+                    freshness=[
+                        FreshnessContractClass(assertion=freshness_assertion_urn)
+                    ]
+                    if freshness_assertion_urn
+                    else None,
+                    dataQuality=[
+                        DataQualityContractClass(assertion=dq_assertion_urn)
+                        for dq_assertion_urn in dq_assertions
+                    ],
+                ),
+                # Also emit status.
+                StatusClass(removed=False),
+                # Emit the contract state as PENDING.
+                DataContractStatusClass(state=DataContractStateClass.PENDING)
+                if True
+                else None,
+            ],
+        )
+
+    @classmethod
+    def from_yaml(
+        cls,
+        file: str,
+    ) -> "DataContract":
+        with open(file) as fp:
+            yaml = YAML(typ="rt")  # default, if not specfied, is 'rt' (round-trip)
+            orig_dictionary = yaml.load(fp)
+            parsed_data_contract = DataContract.parse_obj(orig_dictionary)
+            parsed_data_contract._original_yaml_dict = orig_dictionary
+            return parsed_data_contract
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
new file mode 100644
index 0000000000000..ee8fa1181e614
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+from datetime import timedelta
+from typing import List, Union
+
+import pydantic
+from typing_extensions import Literal
+
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionTypeClass,
+    CalendarIntervalClass,
+    FixedIntervalScheduleClass,
+    FreshnessAssertionInfoClass,
+    FreshnessAssertionScheduleClass,
+    FreshnessAssertionScheduleTypeClass,
+    FreshnessAssertionTypeClass,
+    FreshnessCronScheduleClass,
+)
+
+
+class CronFreshnessAssertion(ConfigModel):
+    type: Literal["cron"]
+
+    cron: str = pydantic.Field(
+        description="The cron expression to use. See https://crontab.guru/ for help."
+    )
+    timezone: str = pydantic.Field(
+        "UTC",
+        description="The timezone to use for the cron schedule. Defaults to UTC.",
+    )
+
+
+class FixedIntervalFreshnessAssertion(ConfigModel):
+    type: Literal["interval"]
+
+    interval: timedelta
+
+
+class FreshnessAssertion(ConfigModel):
+    __root__: Union[
+        CronFreshnessAssertion, FixedIntervalFreshnessAssertion
+    ] = pydantic.Field(discriminator="type")
+
+    @property
+    def id(self):
+        return self.__root__.type
+
+    def generate_mcp(
+        self, assertion_urn: str, entity_urn: str
+    ) -> List[MetadataChangeProposalWrapper]:
+        freshness = self.__root__
+
+        if isinstance(freshness, CronFreshnessAssertion):
+            schedule = FreshnessAssertionScheduleClass(
+                type=FreshnessAssertionScheduleTypeClass.CRON,
+                cron=FreshnessCronScheduleClass(
+                    cron=freshness.cron,
+                    timezone=freshness.timezone,
+                ),
+            )
+        elif isinstance(freshness, FixedIntervalFreshnessAssertion):
+            schedule = FreshnessAssertionScheduleClass(
+                type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL,
+                fixedInterval=FixedIntervalScheduleClass(
+                    unit=CalendarIntervalClass.SECOND,
+                    multiple=int(freshness.interval.total_seconds()),
+                ),
+            )
+        else:
+            raise ValueError(f"Unknown freshness type {freshness}")
+
+        assertionInfo = AssertionInfoClass(
+            type=AssertionTypeClass.FRESHNESS,
+            freshnessAssertion=FreshnessAssertionInfoClass(
+                entity=entity_urn,
+                type=FreshnessAssertionTypeClass.DATASET_CHANGE,
+                schedule=schedule,
+            ),
+        )
+
+        return [
+            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
+        ]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
new file mode 100644
index 0000000000000..b5b592e01f58f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import json
+from typing import List, Union
+
+import pydantic
+from typing_extensions import Literal
+
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.extractor.json_schema_util import get_schema_metadata
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionTypeClass,
+    SchemaAssertionInfoClass,
+    SchemaFieldClass,
+    SchemalessClass,
+    SchemaMetadataClass,
+)
+
+
+class JsonSchemaContract(ConfigModel):
+    type: Literal["json-schema"]
+
+    json_schema: dict = pydantic.Field(alias="json-schema")
+
+    _schema_metadata: SchemaMetadataClass
+
+    def _init_private_attributes(self) -> None:
+        super()._init_private_attributes()
+        self._schema_metadata = get_schema_metadata(
+            platform="urn:li:dataPlatform:datahub",
+            name="",
+            json_schema=self.json_schema,
+            raw_schema_string=json.dumps(self.json_schema),
+        )
+
+
+class FieldListSchemaContract(ConfigModel, arbitrary_types_allowed=True):
+    type: Literal["field-list"]
+
+    fields: List[SchemaFieldClass]
+
+    _schema_metadata: SchemaMetadataClass
+
+    def _init_private_attributes(self) -> None:
+        super()._init_private_attributes()
+        self._schema_metadata = SchemaMetadataClass(
+            schemaName="",
+            platform="urn:li:dataPlatform:datahub",
+            version=0,
+            hash="",
+            platformSchema=SchemalessClass(),
+            fields=self.fields,
+        )
+
+
+class SchemaAssertion(ConfigModel):
+    __root__: Union[JsonSchemaContract, FieldListSchemaContract] = pydantic.Field(
+        discriminator="type"
+    )
+
+    @property
+    def id(self):
+        return self.__root__.type
+
+    def generate_mcp(
+        self, assertion_urn: str, entity_urn: str
+    ) -> List[MetadataChangeProposalWrapper]:
+        schema_metadata = self.__root__._schema_metadata
+
+        assertionInfo = AssertionInfoClass(
+            type=AssertionTypeClass.DATA_SCHEMA,
+            schemaAssertion=SchemaAssertionInfoClass(
+                entity=entity_urn, schema=schema_metadata
+            ),
+        )
+
+        return [
+            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
+        ]
diff --git a/metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py b/metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py
new file mode 100644
index 0000000000000..3745943c8c96a
--- /dev/null
+++ b/metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py
@@ -0,0 +1,80 @@
+import logging
+from typing import Optional
+
+import click
+from click_default_group import DefaultGroup
+
+from datahub.api.entities.datacontract.datacontract import DataContract
+from datahub.ingestion.graph.client import get_default_graph
+from datahub.telemetry import telemetry
+from datahub.upgrade import upgrade
+
+logger = logging.getLogger(__name__)
+
+
+@click.group(cls=DefaultGroup, default="upsert")
+def datacontract() -> None:
+    """A group of commands to interact with the DataContract entity in DataHub."""
+    pass
+
+
+@datacontract.command()
+@click.option("-f", "--file", required=True, type=click.Path(exists=True))
+@upgrade.check_upgrade
+@telemetry.with_telemetry()
+def upsert(file: str) -> None:
+    """Upsert (create or update) a Data Contract in DataHub."""
+
+    data_contract: DataContract = DataContract.from_yaml(file)
+    urn = data_contract.urn
+
+    with get_default_graph() as graph:
+        if not graph.exists(data_contract.entity):
+            raise ValueError(
+                f"Cannot define a data contract for non-existent entity {data_contract.entity}"
+            )
+
+        try:
+            for mcp in data_contract.generate_mcp():
+                graph.emit(mcp)
+            click.secho(f"Update succeeded for urn {urn}.", fg="green")
+        except Exception as e:
+            logger.exception(e)
+            click.secho(
+                f"Update failed for {urn}: {e}",
+                fg="red",
+            )
+
+
+@datacontract.command()
+@click.option(
+    "--urn", required=False, type=str, help="The urn for the data contract to delete"
+)
+@click.option(
+    "-f",
+    "--file",
+    required=False,
+    type=click.Path(exists=True),
+    help="The file containing the data contract definition",
+)
+@click.option("--hard/--soft", required=False, is_flag=True, default=False)
+@upgrade.check_upgrade
+@telemetry.with_telemetry()
+def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
+    """Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
+
+    if not urn:
+        if not file:
+            raise click.UsageError(
+                "Must provide either an urn or a file to delete a data contract"
+            )
+
+        data_contract = DataContract.from_yaml(file)
+        urn = data_contract.urn
+
+    with get_default_graph() as graph:
+        if not graph.exists(urn):
+            raise ValueError(f"Data Contract {urn} does not exist")
+
+        graph.delete_entity(urn, hard=hard)
+        click.secho(f"Data Contract {urn} deleted")
diff --git a/metadata-ingestion/src/datahub/cli/specific/file_loader.py b/metadata-ingestion/src/datahub/cli/specific/file_loader.py
index 54f12e024d294..a9787343fdb91 100644
--- a/metadata-ingestion/src/datahub/cli/specific/file_loader.py
+++ b/metadata-ingestion/src/datahub/cli/specific/file_loader.py
@@ -1,9 +1,7 @@
-import io
 from pathlib import Path
 from typing import Union
 
-from datahub.configuration.common import ConfigurationError
-from datahub.configuration.yaml import YamlConfigurationMechanism
+from datahub.configuration.config_loader import load_config_file
 
 
 def load_file(config_file: Path) -> Union[dict, list]:
@@ -17,19 +15,11 @@ def load_file(config_file: Path) -> Union[dict, list]:
     evolve to becoming a standard function that all the specific. cli variants will use
     to load up the models from external files
     """
-    if not isinstance(config_file, Path):
-        config_file = Path(config_file)
-    if not config_file.is_file():
-        raise ConfigurationError(f"Cannot open config file {config_file}")
 
-    if config_file.suffix in {".yaml", ".yml"}:
-        config_mech: YamlConfigurationMechanism = YamlConfigurationMechanism()
-    else:
-        raise ConfigurationError(
-            f"Only .yaml and .yml are supported. Cannot process file type {config_file.suffix}"
-        )
-
-    raw_config_file = config_file.read_text()
-    config_fp = io.StringIO(raw_config_file)
-    raw_config = config_mech.load_config(config_fp)
-    return raw_config
+    res = load_config_file(
+        config_file,
+        squirrel_original_config=False,
+        resolve_env_vars=False,
+        allow_stdin=False,
+    )
+    return res
diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py
index 0928818c7005c..64c9ec1bb5704 100644
--- a/metadata-ingestion/src/datahub/emitter/mce_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py
@@ -1,11 +1,11 @@
 """Convenience functions for creating MCEs"""
+import hashlib
 import json
 import logging
 import os
 import re
 import time
 from enum import Enum
-from hashlib import md5
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -21,7 +21,6 @@
 import typing_inspect
 
 from datahub.configuration.source_common import DEFAULT_ENV as DEFAULT_ENV_CONFIGURATION
-from datahub.emitter.serialization_helper import pre_json_transform
 from datahub.metadata.schema_classes import (
     AssertionKeyClass,
     AuditStampClass,
@@ -159,11 +158,24 @@ def container_urn_to_key(guid: str) -> Optional[ContainerKeyClass]:
     return None
 
 
+class _DatahubKeyJSONEncoder(json.JSONEncoder):
+    # overload method default
+    def default(self, obj: Any) -> Any:
+        if hasattr(obj, "guid"):
+            return obj.guid()
+        # Call the default method for other types
+        return json.JSONEncoder.default(self, obj)
+
+
 def datahub_guid(obj: dict) -> str:
-    obj_str = json.dumps(
-        pre_json_transform(obj), separators=(",", ":"), sort_keys=True
-    ).encode("utf-8")
-    return md5(obj_str).hexdigest()
+    json_key = json.dumps(
+        obj,
+        separators=(",", ":"),
+        sort_keys=True,
+        cls=_DatahubKeyJSONEncoder,
+    )
+    md5_hash = hashlib.md5(json_key.encode("utf-8"))
+    return str(md5_hash.hexdigest())
 
 
 def make_assertion_urn(assertion_id: str) -> str:
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 7419577b367aa..06f689dfd317b 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -1,11 +1,10 @@
-import hashlib
-import json
-from typing import Any, Dict, Iterable, List, Optional, TypeVar
+from typing import Dict, Iterable, List, Optional, TypeVar
 
 from pydantic.fields import Field
 from pydantic.main import BaseModel
 
 from datahub.emitter.mce_builder import (
+    datahub_guid,
     make_container_urn,
     make_data_platform_urn,
     make_dataplatform_instance_urn,
@@ -33,24 +32,13 @@
 )
 
 
-def _stable_guid_from_dict(d: dict) -> str:
-    json_key = json.dumps(
-        d,
-        separators=(",", ":"),
-        sort_keys=True,
-        cls=DatahubKeyJSONEncoder,
-    )
-    md5_hash = hashlib.md5(json_key.encode("utf-8"))
-    return str(md5_hash.hexdigest())
-
-
 class DatahubKey(BaseModel):
     def guid_dict(self) -> Dict[str, str]:
         return self.dict(by_alias=True, exclude_none=True)
 
     def guid(self) -> str:
         bag = self.guid_dict()
-        return _stable_guid_from_dict(bag)
+        return datahub_guid(bag)
 
 
 class ContainerKey(DatahubKey):
@@ -137,15 +125,6 @@ def as_urn(self) -> str:
         )
 
 
-class DatahubKeyJSONEncoder(json.JSONEncoder):
-    # overload method default
-    def default(self, obj: Any) -> Any:
-        if hasattr(obj, "guid"):
-            return obj.guid()
-        # Call the default method for other types
-        return json.JSONEncoder.default(self, obj)
-
-
 KeyType = TypeVar("KeyType", bound=ContainerKey)
 
 
diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py
index 84615fd9a6148..5bfab3b841fa3 100644
--- a/metadata-ingestion/src/datahub/entrypoints.py
+++ b/metadata-ingestion/src/datahub/entrypoints.py
@@ -21,6 +21,7 @@
 from datahub.cli.ingest_cli import ingest
 from datahub.cli.migrate import migrate
 from datahub.cli.put_cli import put
+from datahub.cli.specific.datacontract_cli import datacontract
 from datahub.cli.specific.dataproduct_cli import dataproduct
 from datahub.cli.specific.group_cli import group
 from datahub.cli.specific.user_cli import user
@@ -158,6 +159,7 @@ def init() -> None:
 datahub.add_command(user)
 datahub.add_command(group)
 datahub.add_command(dataproduct)
+datahub.add_command(datacontract)
 
 try:
     from datahub.cli.lite_cli import lite
diff --git a/metadata-ingestion/src/datahub/ingestion/api/closeable.py b/metadata-ingestion/src/datahub/ingestion/api/closeable.py
index 523174b9978b3..80a5008ed6368 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/closeable.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/closeable.py
@@ -1,7 +1,9 @@
 from abc import abstractmethod
 from contextlib import AbstractContextManager
 from types import TracebackType
-from typing import Optional, Type
+from typing import Optional, Type, TypeVar
+
+_Self = TypeVar("_Self", bound="Closeable")
 
 
 class Closeable(AbstractContextManager):
@@ -9,6 +11,10 @@ class Closeable(AbstractContextManager):
     def close(self) -> None:
         pass
 
+    def __enter__(self: _Self) -> _Self:
+        # This method is mainly required for type checking.
+        return self
+
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 3edeb695e9f21..f9b71892975b4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -701,18 +701,22 @@ def create_test_entity_mcps(
             assertion_urn = mce_builder.make_assertion_urn(
                 mce_builder.datahub_guid(
                     {
-                        "platform": DBT_PLATFORM,
-                        "name": node.dbt_name,
-                        "instance": self.config.platform_instance,
-                        **(
-                            # Ideally we'd include the env unconditionally. However, we started out
-                            # not including env in the guid, so we need to maintain backwards compatibility
-                            # with existing PROD assertions.
-                            {"env": self.config.env}
-                            if self.config.env != mce_builder.DEFAULT_ENV
-                            and self.config.include_env_in_assertion_guid
-                            else {}
-                        ),
+                        k: v
+                        for k, v in {
+                            "platform": DBT_PLATFORM,
+                            "name": node.dbt_name,
+                            "instance": self.config.platform_instance,
+                            **(
+                                # Ideally we'd include the env unconditionally. However, we started out
+                                # not including env in the guid, so we need to maintain backwards compatibility
+                                # with existing PROD assertions.
+                                {"env": self.config.env}
+                                if self.config.env != mce_builder.DEFAULT_ENV
+                                and self.config.include_env_in_assertion_guid
+                                else {}
+                            ),
+                        }.items()
+                        if v is not None
                     }
                 )
             )
diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
index f116550328819..8b393a8f6f1c6 100644
--- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
+++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
@@ -35,6 +35,7 @@
 from datahub.cli.cli_utils import get_boolean_env_variable
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
+from datahub.emitter.serialization_helper import pre_json_transform
 from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
     get_platform_from_sqlalchemy_uri,
 )
@@ -253,13 +254,15 @@ def get_assertions_with_results(
             # possibly for each validation run
             assertionUrn = builder.make_assertion_urn(
                 builder.datahub_guid(
-                    {
-                        "platform": GE_PLATFORM_NAME,
-                        "nativeType": expectation_type,
-                        "nativeParameters": kwargs,
-                        "dataset": assertion_datasets[0],
-                        "fields": assertion_fields,
-                    }
+                    pre_json_transform(
+                        {
+                            "platform": GE_PLATFORM_NAME,
+                            "nativeType": expectation_type,
+                            "nativeParameters": kwargs,
+                            "dataset": assertion_datasets[0],
+                            "fields": assertion_fields,
+                        }
+                    )
                 )
             )
             logger.debug(
@@ -638,7 +641,7 @@ def get_dataset_partitions(self, batch_identifier, data_asset):
                 ].batch_request.runtime_parameters["query"]
                 partitionSpec = PartitionSpecClass(
                     type=PartitionTypeClass.QUERY,
-                    partition=f"Query_{builder.datahub_guid(query)}",
+                    partition=f"Query_{builder.datahub_guid(pre_json_transform(query))}",
                 )
 
                 batchSpec = BatchSpec(
diff --git a/metadata-ingestion/tests/unit/test_mcp_builder.py b/metadata-ingestion/tests/unit/test_mcp_builder.py
index 23f2bddc2084e..561b782ef9e46 100644
--- a/metadata-ingestion/tests/unit/test_mcp_builder.py
+++ b/metadata-ingestion/tests/unit/test_mcp_builder.py
@@ -1,5 +1,4 @@
 import datahub.emitter.mcp_builder as builder
-from datahub.emitter.mce_builder import datahub_guid
 
 
 def test_guid_generator():
@@ -80,7 +79,7 @@ def test_guid_generators():
     key = builder.SchemaKey(
         database="test", schema="Test", platform="mysql", instance="TestInstance"
     )
-    guid_datahub = datahub_guid(key.dict(by_alias=True))
+    guid_datahub = key.guid()
 
     guid = key.guid()
     assert guid == guid_datahub
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl
new file mode 100644
index 0000000000000..df6620b66bfd8
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl
@@ -0,0 +1,22 @@
+namespace com.linkedin.assertion
+
+/**
+ * The Actions about an Assertion.
+ * In the future, we'll likely extend this model to support additional
+ * parameters or options related to the assertion actions.
+ */
+record AssertionAction {
+    /**
+     * The type of the Action
+     */
+    type: enum AssertionActionType {
+      /**
+      * Raise an incident.
+      */
+      RAISE_INCIDENT
+      /**
+      * Resolve open incidents related to the assertion.
+      */
+      RESOLVE_INCIDENT
+    }
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl
new file mode 100644
index 0000000000000..61846c1ba9c12
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl
@@ -0,0 +1,18 @@
+namespace com.linkedin.assertion
+
+/**
+ * The Actions about an Assertion
+ */
+@Aspect = {
+  "name": "assertionActions"
+}
+record AssertionActions {
+    /**
+    * Actions to be executed on successful assertion run.
+    */
+    onSuccess: array[AssertionAction] = []
+    /**
+    * Actions to be executed on failed assertion run.
+    */
+    onFailure: array[AssertionAction] = []
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
index 77ee147a781e2..ae2a58028057b 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
@@ -13,13 +13,58 @@ record AssertionInfo includes CustomProperties, ExternalReference {
     /**
     * Type of assertion. Assertion types can evolve to span Datasets, Flows (Pipelines), Models, Features etc.
     */
+    @Searchable = { }
     type: enum AssertionType {
-      // A single-dataset assertion. When this is the value, the datasetAssertion field will be populated.
+      /**
+       * A single-dataset assertion. When this is the value, the datasetAssertion field will be populated.
+       */
       DATASET
+
+      /**
+       * A freshness assertion, or an assertion which indicates when a particular operation should occur
+       * to an asset.
+       */
+      FRESHNESS
+
+      /**
+       * A volume assertion, or an assertion which indicates how much data should be available for a
+       * particular asset.
+       */
+      VOLUME
+
+      /**
+       * A schema or structural assertion.
+       *
+       * Would have named this SCHEMA but the codegen for PDL does not allow this (reserved word).
+       */
+      DATA_SCHEMA
     }
 
     /**
-    * Dataset Assertion information when type is DATASET
+    * A Dataset Assertion definition. This field is populated when the type is DATASET.
     */
     datasetAssertion: optional DatasetAssertionInfo
+
+    /**
+    * An Freshness Assertion definition. This field is populated when the type is FRESHNESS.
+    */
+    freshnessAssertion: optional FreshnessAssertionInfo
+
+    /**
+    * An Volume Assertion definition. This field is populated when the type is VOLUME.
+    */
+    volumeAssertion: optional VolumeAssertionInfo
+
+    /**
+    * An schema Assertion definition. This field is populated when the type is DATASET_SCHEMA
+    */
+    schemaAssertion: optional SchemaAssertionInfo
+
+    /**
+    * The source or origin of the Assertion definition.
+    *
+    * If the source type of the Assertion is EXTERNAL, it is expected to have a corresponding dataPlatformInstance aspect detailing
+    * the platform where it was ingested from.
+    */
+    source: optional AssertionSource
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
index decbfc08263de..ded84e1969153 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
@@ -5,10 +5,15 @@ namespace com.linkedin.assertion
  */
 record AssertionResult {
   /**
-  *  The final result, e.g. either SUCCESS or FAILURE.
+  *  The final result, e.g. either SUCCESS, FAILURE, or ERROR.
   */
   @TimeseriesField = {}
+  @Searchable = {}
   type: enum AssertionResultType {
+    /**
+    *  The Assertion has not yet been fully evaluated
+    */
+    INIT
     /**
     *  The Assertion Succeeded
     */
@@ -17,6 +22,10 @@ record AssertionResult {
     *  The Assertion Failed
     */
     FAILURE
+    /**
+    *  The Assertion encountered an Error
+    */
+    ERROR
   }
 
   /**
@@ -45,8 +54,13 @@ record AssertionResult {
   nativeResults: optional map[string, string]
 
   /**
-   * URL where full results are available
+   * External URL where full results are available. Only present when assertion source is not native.
    */
   externalUrl: optional string
 
+  /**
+   *  The error object if AssertionResultType is an Error
+   */
+  error: optional AssertionResultError
+
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl
new file mode 100644
index 0000000000000..e768fe8521942
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl
@@ -0,0 +1,45 @@
+namespace com.linkedin.assertion
+
+/**
+ *  An error encountered when evaluating an AssertionResult
+ */
+record AssertionResultError {
+  /**
+  *  The type of error encountered
+  */
+  type: enum AssertionResultErrorType {
+    /**
+    *  Source is unreachable
+    */
+    SOURCE_CONNECTION_ERROR
+    /**
+    *  Source query failed to execute
+    */
+    SOURCE_QUERY_FAILED
+    /**
+    *  Insufficient data to evaluate the assertion
+    */
+    INSUFFICIENT_DATA
+    /**
+    *  Invalid parameters were detected
+    */
+    INVALID_PARAMETERS
+    /**
+    *  Event type not supported by the specified source
+    */
+    INVALID_SOURCE_TYPE
+    /**
+    *  Unsupported platform
+    */
+    UNSUPPORTED_PLATFORM
+    /**
+    *  Unknown error
+    */
+    UNKNOWN_ERROR
+  }
+
+  /**
+  *  Additional metadata depending on the type of error
+  */
+  properties: optional map[string, string]
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
index 9e75f96fafd06..14f1204232740 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
@@ -1,6 +1,7 @@
 namespace com.linkedin.assertion
 
-import com.linkedin.timeseries.TimeseriesAspectBase
+import com.linkedin.timeseries.PartitionSpec
+import com.linkedin.timeseries.TimeWindowSize
 import com.linkedin.common.ExternalReference
 import com.linkedin.common.Urn
 
@@ -12,36 +13,31 @@ import com.linkedin.common.Urn
   "name": "assertionRunEvent",
   "type": "timeseries",
 }
-record AssertionRunEvent includes TimeseriesAspectBase {
+record AssertionRunEvent {
+
+  /**
+   * The event timestamp field as epoch at UTC in milli seconds.
+   */
+  @Searchable = {
+    "fieldName": "lastCompletedTime",
+    "fieldType": "DATETIME"
+  }
+  timestampMillis: long
 
   /**
   *  Native (platform-specific) identifier for this run
   */
-  //Multiple assertions could occur in same evaluator run
   runId: string
 
-  /*
-  * Urn of assertion which is evaluated
-  */
-  @TimeseriesField = {}
-  assertionUrn: Urn
-
   /*
   * Urn of entity on which the assertion is applicable
   */
-  //example - dataset urn, if dataset is being asserted
   @TimeseriesField = {}
   asserteeUrn: Urn
-  
-  /**
-  * Specification of the batch which this run is evaluating
-  */
-  batchSpec: optional BatchSpec
 
   /**
   * The status of the assertion run as per this timeseries event.
   */
-  // Currently just supports COMPLETE, but should evolve to support other statuses like STARTED, RUNNING, etc.
   @TimeseriesField = {}
   status: enum AssertionRunStatus {
     /**
@@ -59,4 +55,33 @@ record AssertionRunEvent includes TimeseriesAspectBase {
    * Runtime parameters of evaluation
    */
   runtimeContext: optional map[string, string]
+
+  /**
+  * Specification of the batch which this run is evaluating
+  */
+  batchSpec: optional BatchSpec
+
+  /*
+  * Urn of assertion which is evaluated
+  */
+  @TimeseriesField = {}
+  assertionUrn: Urn
+
+  /**
+   * Granularity of the event if applicable
+   */
+  eventGranularity: optional TimeWindowSize
+
+  /**
+   * The optional partition specification.
+   */
+  partitionSpec: optional PartitionSpec = {
+    "type":"FULL_TABLE",
+    "partition":"FULL_TABLE_SNAPSHOT"
+  }
+
+  /**
+   * The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.
+   */
+  messageId: optional string
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl
new file mode 100644
index 0000000000000..d8892c0c71c6f
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.assertion
+
+/**
+ * The source of an assertion
+ */
+record AssertionSource {
+    /**
+     * The type of the Assertion Source
+     */
+    @Searchable = {
+       "fieldName": "sourceType"
+    }
+    type:  enum AssertionSourceType {
+      /**
+       * The assertion was defined natively on DataHub by a user.
+       */
+      NATIVE
+      /**
+       * The assertion was defined and managed externally of DataHub.
+       */
+      EXTERNAL
+      /**
+       * The assertion was inferred, e.g. from offline AI / ML models.
+       */
+      INFERRED
+    }
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl
index b79b96f9379b0..968944165a1c8 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl
@@ -4,6 +4,7 @@ namespace com.linkedin.assertion
  * The function that is applied to the aggregation input (schema, rows, column values) before evaluating an operator.
  */
 enum AssertionStdAggregation {
+
   /**
   * Assertion is applied on number of rows.
   */
@@ -20,7 +21,7 @@ enum AssertionStdAggregation {
   COLUMN_COUNT
 
   /**
-  * Assertion is applied on individual column value.
+  * Assertion is applied on individual column value. (No aggregation)
   */
   IDENTITY
 
@@ -42,6 +43,13 @@ enum AssertionStdAggregation {
   /**
   * Assertion is applied on proportion of distinct values in column
   */
+  UNIQUE_PROPORTION
+
+  /**
+  * Assertion is applied on proportion of distinct values in column
+  *
+  * Deprecated! Use UNIQUE_PROPORTION instead.
+  */
   UNIQUE_PROPOTION
 
   /**
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl
new file mode 100644
index 0000000000000..5a1ff4fa73ffb
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl
@@ -0,0 +1,16 @@
+namespace com.linkedin.assertion
+
+/**
+* An enum to represent a type of change in an assertion value, metric, or measurement.
+*/
+enum AssertionValueChangeType {
+  /**
+   * A change that is defined in absolute terms.
+   */
+   ABSOLUTE
+   /**
+   * A change that is defined in relative terms using percentage change
+   * from the original value.
+   */
+   PERCENTAGE
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl
new file mode 100644
index 0000000000000..4d5bf261cbf89
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl
@@ -0,0 +1,18 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.schema.SchemaFieldDataType
+
+/**
+* Information about the Audit Log operation to use in evaluating an assertion.
+**/
+record AuditLogSpec {
+  /**
+  * The list of operation types that should be monitored. If not provided, a default set will be used.
+  */
+  operationTypes: optional array [string]
+
+  /**
+  * Optional: The user name associated with the operation.
+  */
+  userName: optional string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl
index c411c7ff8a572..2a8bf28f1ff11 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl
@@ -18,9 +18,10 @@ record DatasetAssertionInfo {
     /**
     * Scope of the Assertion. What part of the dataset does this assertion apply to?
     **/
+    @Searchable = {}
     scope: enum DatasetAssertionScope {
       /**
-      * This assertion applies to dataset columns
+      * This assertion applies to dataset column(s)
       */
       DATASET_COLUMN
 
@@ -29,6 +30,11 @@ record DatasetAssertionInfo {
       */
       DATASET_ROWS
 
+      /**
+      * This assertion applies to the storage size of the dataset
+      */
+      DATASET_STORAGE_SIZE
+
       /**
       * This assertion applies to the schema of the dataset
       */
@@ -41,7 +47,9 @@ record DatasetAssertionInfo {
     }
 
     /**
-    * One or more dataset schema fields that are targeted by this assertion
+    * One or more dataset schema fields that are targeted by this assertion.
+    *
+    * This field is expected to be provided if the assertion scope is DATASET_COLUMN.
     */
     @Relationship = {
       "/*": {
@@ -49,11 +57,18 @@ record DatasetAssertionInfo {
         "entityTypes": [ "schemaField" ]
       }
     }
+    @Searchable = {
+      "/*": {
+        "fieldType": "URN"
+      }
+    }
     fields: optional array[Urn]
 
     /**
     * Standardized assertion operator
+    * This field is left blank if there is no selected aggregation or metric for a particular column.
     */
+    @Searchable = {}
     aggregation: optional AssertionStdAggregation
 
     /**
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl
new file mode 100644
index 0000000000000..c08c33ffb92d3
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl
@@ -0,0 +1,10 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.timeseries.TimeWindowSize
+
+/**
+* Attributes defining a relative fixed interval SLA schedule.
+*/
+record FixedIntervalSchedule includes TimeWindowSize {
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl
new file mode 100644
index 0000000000000..4445a11ff40a7
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl
@@ -0,0 +1,53 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.dataset.DatasetFilter
+
+/**
+* Attributes defining a Freshness Assertion.
+**/
+record FreshnessAssertionInfo {
+    /**
+     * The type of the freshness assertion being monitored.
+     */
+    @Searchable = {}
+    type: enum FreshnessAssertionType {
+      /**
+       * An Freshness based on Operations performed on a particular Dataset (insert, update, delete, etc) and sourced from an audit log, as
+       * opposed to based on the highest watermark in a timestamp column (e.g. a query). Only valid when entity is of type "dataset".
+       */
+       DATASET_CHANGE
+       /**
+       * An Freshness based on a successful execution of a Data Job.
+       */
+       DATA_JOB_RUN
+    }
+
+    /**
+    * The entity targeted by this Freshness check.
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset", "dataJob" ]
+    }
+    entity: Urn
+
+    /**
+    * Produce FAILURE Assertion Result if the asset is not updated on the cadence and within the time range described by the schedule.
+    */
+    @Searchable = {
+      "/type": {
+        "fieldName": "scheduleType"
+      }
+    }
+    schedule: FreshnessAssertionSchedule
+
+    /**
+     * A definition of the specific filters that should be applied, when performing monitoring.
+     * If not provided, there is no filter, and the full table is under consideration.
+     */
+    filter: optional DatasetFilter
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl
new file mode 100644
index 0000000000000..a87342ad4f5ed
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl
@@ -0,0 +1,66 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+
+/**
+* Attributes defining a single Freshness schedule.
+*/
+record FreshnessAssertionSchedule {
+
+  /**
+   * The type of a Freshness Assertion Schedule.
+   *
+   * Once we support data-time-relative schedules (e.g. schedules relative to time partitions),
+   * we will add those schedule types here.
+   */
+  type: enum FreshnessAssertionScheduleType {
+    /**
+     * An highly configurable recurring schedule which describes the times of events described
+     * by a CRON schedule, with the evaluation schedule assuming to be matching the cron schedule.
+     *
+     * In a CRON schedule type, we compute the look-back window to be the time between the last scheduled event
+     * and the current event (evaluation time). This means that the evaluation schedule must match exactly
+     * the schedule defined inside the cron schedule.
+     *
+     * For example, a CRON schedule defined as "0 8 * * *" would represent a schedule of "every day by 8am". Assuming
+     * that the assertion evaluation schedule is defined to match this, the freshness assertion would be evaluated in the following way:
+     *
+     *     1. Compute the "last scheduled occurrence" of the event using the CRON schedule. For example, yesterday at 8am.
+     *     2. Compute the bounds of a time window between the "last scheduled occurrence" (yesterday at 8am) until the "current occurrence" (today at 8am)
+     *     3. Verify that the target event has occurred within the CRON-interval window.
+     *     4. If the target event has occurred within the time window, then assertion passes.
+     *     5. If the target event has not occurred within the time window, then the assertion fails.
+     *
+     */
+    CRON
+    /**
+     * A fixed interval which is used to compute a look-back window for use when evaluating the assertion relative
+     * to the Evaluation Time of the Assertion.
+     *
+     * To compute the valid look-back window, we subtract the fixed interval from the evaluation time. Then, we verify
+     * that the target event has occurred within that window.
+     *
+     * For example, a fixed interval of "24h" would represent a schedule of "in the last 24 hours".
+     * The 24 hour interval is relative to the evaluation time of the assertion. For example if we schedule the assertion
+     * to be evaluated each hour, we'd compute the result as follows:
+     *
+     *     1. Subtract the fixed interval from the current time (Evaluation time) to compute the bounds of a fixed look-back window.
+     *     2. Verify that the target event has occurred within the CRON-interval window.
+     *     3. If the target event has occurred within the time window, then assertion passes.
+     *     4. If the target event has not occurred within the time window, then the assertion fails.
+     *
+     */
+    FIXED_INTERVAL
+  }
+
+  /**
+   * A cron schedule. This field is required when type is CRON.
+   */
+  cron: optional FreshnessCronSchedule
+
+  /**
+   * A fixed interval schedule. This field is required when type is FIXED_INTERVAL.
+   */
+  fixedInterval: optional FixedIntervalSchedule
+
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl
new file mode 100644
index 0000000000000..d48900690c51d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl
@@ -0,0 +1,25 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining a CRON-formatted schedule used for defining a freshness assertion.
+*/
+record FreshnessCronSchedule {
+  /**
+   * A cron-formatted execution interval, as a cron string, e.g. 1 * * * *
+   */
+  cron: string
+
+  /**
+   * Timezone in which the cron interval applies, e.g. America/Los Angeles
+   */
+  timezone: string
+
+  /**
+   * An optional offset in milliseconds to SUBTRACT from the timestamp generated by the cron schedule
+   * to generate the lower bounds of the "freshness window", or the window of time in which an event must have occurred in order for the Freshness check
+   * to be considering passing.
+   *
+   * If left empty, the start of the SLA window will be the _end_ of the previously evaluated Freshness window.
+   */
+   windowStartOffsetMs: optional long
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl
new file mode 100644
index 0000000000000..7b25589e500da
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl
@@ -0,0 +1,17 @@
+namespace com.linkedin.assertion
+
+enum FreshnessFieldKind {
+  /**
+  * Determine that a change has occurred by inspecting an last modified field which
+  * represents the last time at which a row was changed.
+  */
+  LAST_MODIFIED,
+  /**
+   * Determine that a change has occurred by inspecting a field which should be tracked as the
+   * "high watermark" for the table. This should be an ascending number or date field.
+   *
+   * If rows with this column have not been added since the previous check
+   * then the Freshness Assertion will fail.
+   */
+  HIGH_WATERMARK
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl
new file mode 100644
index 0000000000000..04acd1c71352d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl
@@ -0,0 +1,14 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.schema.SchemaFieldSpec
+
+
+/**
+* Lightweight spec used for referencing a particular schema field.
+**/
+record FreshnessFieldSpec includes SchemaFieldSpec {
+  /**
+   * The type of the field being used to verify the Freshness Assertion.
+   */
+  kind: optional FreshnessFieldKind
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl
new file mode 100644
index 0000000000000..d1d3e7b23b666
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl
@@ -0,0 +1,60 @@
+namespace com.linkedin.assertion
+
+/**
+* The definition of the transformer function  that should be applied to a given field / column value in a dataset
+* in order to determine the segment or bucket that it belongs to, which in turn is used to evaluate
+* volume assertions.
+*/
+record IncrementingSegmentFieldTransformer {
+  /**
+  * A 'standard' transformer type. Note that not all source systems will support all operators.
+  */
+  type: enum IncrementingSegmentFieldTransformerType {
+    /**
+    * Rounds a timestamp (in seconds) down to the start of the month.
+    */
+    TIMESTAMP_MS_TO_MINUTE
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the nearest hour.
+    */
+    TIMESTAMP_MS_TO_HOUR
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the start of the day.
+    */
+    TIMESTAMP_MS_TO_DATE
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the start of the month
+    */
+    TIMESTAMP_MS_TO_MONTH
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the start of the year
+    */
+    TIMESTAMP_MS_TO_YEAR
+
+    /**
+    * Rounds a numeric value down to the nearest integer.
+    */
+    FLOOR
+
+    /**
+    * Rounds a numeric value up to the nearest integer.
+    */
+    CEILING
+
+    /**
+    * A backdoor to provide a native operator type specific to a given source system like
+    * Snowflake, Redshift, BQ, etc.
+    */
+    NATIVE
+  }
+
+  /**
+  * The 'native' transformer type, useful as a back door if a custom operator is required.
+  * This field is required if the type is NATIVE.
+  */
+  nativeType: optional string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl
new file mode 100644
index 0000000000000..7c4c73f2ea887
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl
@@ -0,0 +1,33 @@
+namespace com.linkedin.assertion
+
+
+/**
+* Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_CHANGE volume assertion.
+*/
+record IncrementingSegmentRowCountChange {
+  /**
+   * A specification of how the 'segment' can be derived using a column and an optional transformer function.
+   */
+  segment: IncrementingSegmentSpec
+
+  /**
+  * The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
+  */
+  type: AssertionValueChangeType
+
+  /**
+  * The operator you'd like to apply to the row count value
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl
new file mode 100644
index 0000000000000..6b035107aae09
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_TOTAL volume assertion.
+*/
+record IncrementingSegmentRowCountTotal {
+  /**
+   * A specification of how the 'segment' can be derived using a column and an optional transformer function.
+   */
+  segment: IncrementingSegmentSpec
+
+  /**
+  * The operator you'd like to apply.
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl
new file mode 100644
index 0000000000000..eddd0c3da3df7
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl
@@ -0,0 +1,33 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.schema.SchemaFieldSpec
+
+/**
+* Core attributes required to identify an incrementing segment in a table. This type is mainly useful
+* for tables that constantly increase with new rows being added on a particular cadence (e.g. fact or event tables)
+*
+* An incrementing segment represents a logical chunk of data which is INSERTED
+* into a dataset on a regular interval, along with the presence of a constantly-incrementing column
+* value such as an event time, date partition, or last modified column.
+*
+* An incrementing segment is principally identified by 2 key attributes combined:
+*
+*  1. A field or column that represents the incrementing value. New rows that are inserted will be identified using this column.
+*     Note that the value of this column may not by itself represent the "bucket" or the "segment" in which the row falls.
+*
+*  2. [Optional] An transformer function that may be applied to the selected column value in order
+*     to obtain the final "segment identifier" or "bucket identifier". Rows that have the same value after applying the transformation
+*     will be grouped into the same segment, using which the final value (e.g. row count) will be determined.
+*/
+record IncrementingSegmentSpec {
+  /**
+  * The field to use to generate segments. It must be constantly incrementing as new rows are inserted.
+  */
+  field: SchemaFieldSpec
+
+  /**
+  * Optional transformer function to apply to the field in order to obtain the final segment or bucket identifier.
+  * If not provided, then no operator will be applied to the field. (identity function)
+  */
+  transformer: optional IncrementingSegmentFieldTransformer
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl
new file mode 100644
index 0000000000000..85a915066f584
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining a ROW_COUNT_CHANGE volume assertion.
+*/
+record RowCountChange {
+  /**
+  * The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
+  */
+  type: AssertionValueChangeType
+
+  /**
+  * The operator you'd like to apply.
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl
new file mode 100644
index 0000000000000..f691f15f62e04
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl
@@ -0,0 +1,22 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining a ROW_COUNT_TOTAL volume assertion.
+*/
+record RowCountTotal {
+  /**
+  * The operator you'd like to apply.
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl
new file mode 100644
index 0000000000000..fd246e0c7cfc4
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl
@@ -0,0 +1,29 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.schema.SchemaMetadata
+
+/**
+* Attributes that are applicable to schema assertions
+**/
+record SchemaAssertionInfo {
+    /**
+    * The entity targeted by the assertion
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset", "dataJob" ]
+    }
+    entity: Urn
+
+    /**
+     * A definition of the expected structure for the asset
+     *
+     * Note that many of the fields of this model, especially those related to metadata (tags, terms)
+     * will go unused in this context.
+     */
+     schema: SchemaMetadata
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl
new file mode 100644
index 0000000000000..327b76f95762e
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl
@@ -0,0 +1,82 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.dataset.DatasetFilter
+
+/**
+* Attributes defining a dataset Volume Assertion
+*/
+record VolumeAssertionInfo {
+    /**
+     * The type of the freshness assertion being monitored.
+     */
+    @Searchable = {}
+    type: enum VolumeAssertionType {
+      /**
+       * A volume assertion that is evaluated against the total row count of a dataset.
+       */
+       ROW_COUNT_TOTAL
+       /**
+       * A volume assertion that is evaluated against an incremental row count of a dataset,
+       * or a row count change.
+       */
+       ROW_COUNT_CHANGE
+       /**
+        * A volume assertion that checks the latest "segment" in a table based on an incrementing
+        * column to check whether it's row count falls into a particular range.
+        *
+        * This can be used to monitor the row count of an incrementing date-partition column segment.
+        */
+       INCREMENTING_SEGMENT_ROW_COUNT_TOTAL
+       /**
+        * A volume assertion that compares the row counts in neighboring "segments" or "partitions"
+        * of an incrementing column.
+        * This can be used to track changes between subsequent date partition
+        * in a table, for example.
+        */
+       INCREMENTING_SEGMENT_ROW_COUNT_CHANGE
+    }
+
+    /**
+    * The entity targeted by this Volume check.
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset" ]
+    }
+    entity: Urn
+
+    /**
+    * Produce FAILURE Assertion Result if the row count of the asset does not meet specific requirements.
+    * Required if type is 'ROW_COUNT_TOTAL'
+    */
+    rowCountTotal: optional RowCountTotal
+
+    /**
+    * Produce FAILURE Assertion Result if the delta row count of the asset does not meet specific requirements
+    * within a given period of time.
+    * Required if type is 'ROW_COUNT_CHANGE'
+    */
+    rowCountChange: optional RowCountChange
+
+    /**
+    * Produce FAILURE Assertion Result if the asset's latest incrementing segment row count total
+    * does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_TOTAL'
+    */
+    incrementingSegmentRowCountTotal: optional IncrementingSegmentRowCountTotal
+
+    /**
+    * Produce FAILURE Assertion Result if the asset's incrementing segment row count delta
+    * does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_CHANGE'
+    */
+    incrementingSegmentRowCountChange: optional IncrementingSegmentRowCountChange
+
+    /**
+     * A definition of the specific filters that should be applied, when performing monitoring.
+     * If not provided, there is no filter, and the full table is under consideration.
+     */
+    filter: optional DatasetFilter
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl
new file mode 100644
index 0000000000000..a623f585df30c
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl
@@ -0,0 +1,59 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+/**
+ * Information about a data contract
+ */
+@Aspect = {
+  "name": "dataContractProperties"
+}
+record DataContractProperties {
+  /**
+   * The entity that this contract is associated with. Currently, we only support Dataset contracts, but
+   * in the future we may also support Data Product level contracts.
+   */
+  @Relationship = {
+    "name": "ContractFor",
+    "entityTypes": [ "dataset" ]
+  }
+  entity: Urn
+
+  /**
+   * An optional set of schema contracts. If this is a dataset contract, there will only be one.
+   */
+   @Relationship = {
+     "/*/assertion": {
+        "name": "IncludesSchemaAssertion",
+        "entityTypes": [ "assertion" ]
+     }
+   }
+  schema: optional array[SchemaContract]
+
+  /**
+   * An optional set of FRESHNESS contracts. If this is a dataset contract, there will only be one.
+   */
+   @Relationship = {
+     "/*/assertion": {
+        "name": "IncludesFreshnessAssertion",
+        "entityTypes": [ "assertion" ]
+     }
+   }
+  freshness: optional array[FreshnessContract]
+
+  /**
+   * An optional set of Data Quality contracts, e.g. table and column level contract constraints.
+   */
+   @Relationship = {
+     "/*/assertion": {
+        "name": "IncludesDataQualityAssertion",
+        "entityTypes": [ "assertion" ]
+     }
+   }
+  dataQuality: optional array[DataQualityContract]
+
+  /**
+   * YAML-formatted contract definition
+   */
+  rawContract: optional string
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl
new file mode 100644
index 0000000000000..d61fb191ae53d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+import com.linkedin.common.CustomProperties
+
+/**
+ * Information about the status of a data contract
+ */
+@Aspect = {
+  "name": "dataContractStatus"
+}
+record DataContractStatus includes CustomProperties {
+  /**
+   * The latest state of the data contract
+   */
+   @Searchable = {}
+   state: enum DataContractState {
+    /**
+    * The data contract is active.
+    */
+    ACTIVE
+    /**
+    * The data contract is pending implementation.
+    */
+    PENDING
+   }
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl
new file mode 100644
index 0000000000000..273d2c2a56f95
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl
@@ -0,0 +1,16 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+
+/**
+ * A data quality contract pertaining to a physical data asset
+ * Data Quality contracts are used to make assertions about data quality metrics for a physical data asset
+ */
+record DataQualityContract {
+  /**
+   * The assertion representing the Data Quality contract.
+   * E.g. a table or column-level assertion.
+   */
+  assertion: Urn
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl
new file mode 100644
index 0000000000000..8cfa66846d505
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl
@@ -0,0 +1,13 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+/**
+ * A contract pertaining to the operational SLAs of a physical data asset
+ */
+record FreshnessContract {
+  /**
+   * The assertion representing the SLA contract.
+   */
+  assertion: Urn
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl
new file mode 100644
index 0000000000000..6c11e0da5b128
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl
@@ -0,0 +1,13 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+/**
+ * Expectations for a logical schema
+ */
+record SchemaContract {
+  /**
+   * The assertion representing the schema contract.
+   */
+  assertion: Urn
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl
new file mode 100644
index 0000000000000..6823398f79f3d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl
@@ -0,0 +1,30 @@
+namespace com.linkedin.dataset
+
+/**
+ * A definition of filters that should be used when
+ * querying an external Dataset or Table.
+ *
+ * Note that this models should NOT be used for working with
+ * search / filter on DataHub Platform itself.
+ */
+record DatasetFilter {
+  /**
+   * How the partition will be represented in this model.
+   *
+   * In the future, we'll likely add support for more structured
+   * predicates.
+   */
+  type: enum DatasetFilterType {
+    /**
+     * The partition is represented as a an opaque, raw SQL
+     * clause.
+     */
+    SQL
+  }
+
+  /**
+   * The raw where clause string which will be used for monitoring.
+   * Required if the type is SQL.
+   */
+  sql: optional string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl
new file mode 100644
index 0000000000000..f1d4a709cd6bf
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl
@@ -0,0 +1,14 @@
+namespace com.linkedin.metadata.key
+
+/**
+ * Key for a Data Contract
+ */
+@Aspect = {
+  "name": "dataContractKey"
+}
+record DataContractKey {
+  /**
+  * Unique id for the contract
+  */
+  id: string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl
new file mode 100644
index 0000000000000..e875ff7a84403
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl
@@ -0,0 +1,21 @@
+namespace com.linkedin.schema
+
+/**
+* Lightweight spec used for referencing a particular schema field.
+**/
+record SchemaFieldSpec {
+  /**
+  * The field path
+  */
+  path: string
+
+  /**
+  * The DataHub standard schema field type.
+  */
+  type: string
+
+  /**
+  * The native field type
+  */
+  nativeType: string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index 56fc5f6568eb7..11d0f74305d7b 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -262,6 +262,7 @@ entities:
       - assertionInfo
       - dataPlatformInstance
       - assertionRunEvent
+      - assertionActions
       - status
   - name: dataHubRetention
     category: internal
@@ -457,4 +458,12 @@ entities:
     aspects:
       - ownershipTypeInfo
       - status
+  - name: dataContract
+    category: core
+    keyAspect: dataContractKey
+    aspects:
+      - dataContractProperties
+      - dataContractStatus
+      - status
+
 events:

From 2bc685d3b98f879d1c3051a8484a78489359d910 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Thu, 5 Oct 2023 09:31:32 +0530
Subject: [PATCH 09/98] ci: tweak ci to decrease wait time of devs (#8945)

---
 .github/workflows/build-and-test.yml               | 14 ++++++++++----
 .github/workflows/metadata-ingestion.yml           |  7 ++++---
 .../integration/powerbi/test_admin_only_api.py     |  3 +++
 .../tests/integration/powerbi/test_m_parser.py     |  2 +-
 .../tests/integration/powerbi/test_powerbi.py      |  2 +-
 .../tests/integration/snowflake/test_snowflake.py  |  4 ++--
 .../integration/tableau/test_tableau_ingest.py     |  2 +-
 .../tests/integration/trino/test_trino.py          |  5 ++---
 8 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index 3f409878b191f..96b9bb2a14933 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -27,8 +27,8 @@ jobs:
         command:
           [
             # metadata-ingestion and airflow-plugin each have dedicated build jobs
-            "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
-            "./gradlew :datahub-frontend:build :datahub-web-react:build --parallel",
+            "except_metadata_ingestion",
+            "frontend"
           ]
         timezone:
           [
@@ -53,9 +53,15 @@ jobs:
         with:
           python-version: "3.10"
           cache: pip
-      - name: Gradle build (and test)
+      - name: Gradle build (and test) for metadata ingestion
+        # we only need the timezone runs for frontend tests
+        if: ${{  matrix.command == 'except_metadata_ingestion' && matrix.timezone == 'America/New_York' }}
         run: |
-          ${{ matrix.command }}
+          ./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel
+      - name: Gradle build (and test) for frontend
+        if: ${{  matrix.command == 'frontend' }}
+        run: |
+          ./gradlew :datahub-frontend:build :datahub-web-react:build --parallel
         env:
           NODE_OPTIONS: "--max-old-space-size=3072"
       - uses: actions/upload-artifact@v3
diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml
index 8d56a0adf5bd5..dea4603868f8e 100644
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@@ -34,7 +34,6 @@ jobs:
         python-version: ["3.7", "3.10"]
         command:
           [
-            "lint",
             "testQuick",
             "testIntegrationBatch0",
             "testIntegrationBatch1",
@@ -54,6 +53,9 @@ jobs:
         run: ./metadata-ingestion/scripts/install_deps.sh
       - name: Install package
         run: ./gradlew :metadata-ingestion:installPackageOnly
+      - name: Run lint alongwith testQuick
+        if: ${{  matrix.command == 'testQuick' }}
+        run: ./gradlew :metadata-ingestion:lint
       - name: Run metadata-ingestion tests
         run: ./gradlew :metadata-ingestion:${{ matrix.command }}
       - name: Debug info
@@ -65,7 +67,6 @@ jobs:
           docker image ls
           docker system df
       - uses: actions/upload-artifact@v3
-        if: ${{ always() && matrix.command != 'lint' }}
         with:
           name: Test Results (metadata ingestion ${{ matrix.python-version }})
           path: |
@@ -73,7 +74,7 @@ jobs:
             **/build/test-results/test/**
             **/junit.*.xml
       - name: Upload coverage to Codecov
-        if: ${{ always() && matrix.python-version == '3.10' && matrix.command != 'lint' }}
+        if: ${{ always() && matrix.python-version == '3.10' }}
         uses: codecov/codecov-action@v3
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py
index f95fd81681a9a..6f45dcf97f1dd 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py
@@ -3,11 +3,14 @@
 from typing import Any, Dict
 from unittest import mock
 
+import pytest
 from freezegun import freeze_time
 
 from datahub.ingestion.run.pipeline import Pipeline
 from tests.test_helpers import mce_helpers
 
+pytestmark = pytest.mark.integration_batch_2
+
 FROZEN_TIME = "2022-02-03 07:00:00"
 
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 2e9c02ef759a5..e3cc6c8101650 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -19,7 +19,7 @@
 from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage
 from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
-pytestmark = pytest.mark.slow
+pytestmark = pytest.mark.integration_batch_2
 
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu10758.ap-unknown-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index b0695e3ea9954..7232d2a38da1d 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -21,7 +21,7 @@
 )
 from tests.test_helpers import mce_helpers
 
-pytestmark = pytest.mark.slow
+pytestmark = pytest.mark.integration_batch_2
 FROZEN_TIME = "2022-02-03 07:00:00"
 
 
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
index dec50aefd19f0..2c77ace8b53e5 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
@@ -30,6 +30,8 @@
 from tests.integration.snowflake.common import FROZEN_TIME, default_query_results
 from tests.test_helpers import mce_helpers
 
+pytestmark = pytest.mark.integration_batch_2
+
 
 def random_email():
     return (
@@ -55,7 +57,6 @@ def random_cloud_region():
     )
 
 
-@pytest.mark.integration
 def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake"
 
@@ -183,7 +184,6 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake"
 
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 53b8519a886d3..c31867f5aa904 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -757,7 +757,7 @@ def test_tableau_no_verify():
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow
+@pytest.mark.integration_batch_2
 def test_tableau_signout_timeout(pytestconfig, tmp_path, mock_datahub_graph):
     enable_logging()
     output_file_name: str = "tableau_signout_timeout_mces.json"
diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py
index 22e5f6f91a06e..177c273c0d242 100644
--- a/metadata-ingestion/tests/integration/trino/test_trino.py
+++ b/metadata-ingestion/tests/integration/trino/test_trino.py
@@ -13,6 +13,8 @@
 from tests.test_helpers import fs_helpers, mce_helpers
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_1
+
 FROZEN_TIME = "2021-09-23 12:00:00"
 
 data_platform = "trino"
@@ -51,7 +53,6 @@ def loaded_trino(trino_runner):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 @pytest.mark.xfail
 def test_trino_ingest(
     loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time
@@ -111,7 +112,6 @@ def test_trino_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_trino_hive_ingest(
     loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time
 ):
@@ -167,7 +167,6 @@ def test_trino_hive_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_trino_instance_ingest(
     loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time
 ):

From 2fcced6db9d30228c421d0773c8249c889cd0d9f Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 5 Oct 2023 09:31:57 +0530
Subject: [PATCH 10/98] docs(ingest): add permissions required for athena
 ingestion (#8948)

---
 .../docs/sources/athena/athena_pre.md         | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 metadata-ingestion/docs/sources/athena/athena_pre.md

diff --git a/metadata-ingestion/docs/sources/athena/athena_pre.md b/metadata-ingestion/docs/sources/athena/athena_pre.md
new file mode 100644
index 0000000000000..a56457d3f84fc
--- /dev/null
+++ b/metadata-ingestion/docs/sources/athena/athena_pre.md
@@ -0,0 +1,72 @@
+### Prerequisities
+
+In order to execute this source, you will need to create a policy with below permissions and attach it to the the aws role or credentials used in ingestion recipe.
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "VisualEditor0",
+      "Effect": "Allow",
+      "Action": [
+        "athena:GetTableMetadata", 
+        "athena:StartQueryExecution", 
+        "athena:GetQueryResults", 
+        "athena:GetDatabase", 
+        "athena:ListDataCatalogs",
+        "athena:GetDataCatalog", 
+        "athena:ListQueryExecutions", 
+        "athena:GetWorkGroup", 
+        "athena:StopQueryExecution", 
+        "athena:GetQueryResultsStream", 
+        "athena:ListDatabases", 
+        "athena:GetQueryExecution", 
+        "athena:ListTableMetadata", 
+        "athena:BatchGetQueryExecution", 
+        "glue:GetTables", 
+        "glue:GetDatabases", 
+        "glue:GetTable",
+        "glue:GetDatabase",
+        "glue:SearchTables",
+        "glue:GetTableVersions",
+        "glue:GetTableVersion",
+        "glue:GetPartition", 
+        "glue:GetPartitions", 
+        "s3:GetObject",
+        "s3:ListBucket",
+        "s3:GetBucketLocation",
+      ],
+      "Resource": [
+        "arn:aws:athena:${region-id}:${account-id}:datacatalog/*",
+        "arn:aws:athena:${region-id}:${account-id}:workgroup/*",
+        "arn:aws:glue:${region-id}:${account-id}:tableVersion/*/*/*",
+        "arn:aws:glue:${region-id}:${account-id}:table/*/*", 
+        "arn:aws:glue:${region-id}:${account-id}:catalog", 
+        "arn:aws:glue:${region-id}:${account-id}:database/*", 
+        "arn:aws:s3:::${datasets-bucket}",
+        "arn:aws:s3:::${datasets-bucket}/*"
+      ]
+    },
+    {
+      "Sid": "VisualEditor1",
+      "Effect": "Allow",
+      "Action": [
+        "s3:PutObject",
+        "s3:GetObject",
+        "s3:ListBucketMultipartUploads",
+        "s3:AbortMultipartUpload",
+        "s3:ListBucket",
+        "s3:GetBucketLocation",
+        "s3:ListMultipartUploadParts"
+      ],
+      "Resource": [
+        "arn:aws:s3:::${athena-query-result-bucket}/*",
+        "arn:aws:s3:::${athena-query-result-bucket}"
+      ]
+    },
+  ]
+}
+```
+
+Replace `${var}` with appropriate values as per your athena setup.
\ No newline at end of file

From 6310e51eb09711e98d86625578127349c5144c66 Mon Sep 17 00:00:00 2001
From: Jinlin Yang <86577891+jinlintt@users.noreply.github.com>
Date: Wed, 4 Oct 2023 21:03:31 -0700
Subject: [PATCH 11/98] feat(ingestion/dynamodb): implement pagination for
 list_tables (#8910)

---
 .../app/ingest/source/builder/sources.json    |  4 +-
 .../docs/sources/dynamodb/dynamodb_post.md    | 13 ++-
 .../docs/sources/dynamodb/dynamodb_pre.md     |  6 +-
 .../docs/sources/dynamodb/dynamodb_recipe.yml | 16 ++--
 .../ingestion/source/dynamodb/dynamodb.py     | 85 +++++++++++--------
 5 files changed, 65 insertions(+), 59 deletions(-)

diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json
index 1bd5b6f1f768b..b18384909c33f 100644
--- a/datahub-web-react/src/app/ingest/source/builder/sources.json
+++ b/datahub-web-react/src/app/ingest/source/builder/sources.json
@@ -130,7 +130,7 @@
         "name": "dynamodb",
         "displayName": "DynamoDB",
         "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
-        "recipe": "source:\n    type: dynamodb\n    config:\n        platform_instance: \"AWS_ACCOUNT_ID\"\n        aws_access_key_id : '${AWS_ACCESS_KEY_ID}'\n        aws_secret_access_key : '${AWS_SECRET_ACCESS_KEY}'\n        # User could use the below option to provide a list of primary keys of a table in dynamodb format,\n        # those items from given primary keys will be included when we scan the table.\n        # For each table we can retrieve up to 16 MB of data, which can contain as many as 100 items.\n        # We'll enforce the the primary keys list size not to exceed 100\n        # The total items we'll try to retrieve in these two scenarios:\n        # 1. If user don't specify include_table_item: we'll retrieve up to 100 items\n        # 2. If user specifies include_table_item: we'll retrieve up to 100 items plus user specified items in\n        # the table, with a total not more than 200 items\n        # include_table_item:\n        #   table_name:\n        #     [\n        #       {\n        #         'partition_key_name': { 'attribute_type': 'attribute_value' },\n        #         'sort_key_name': { 'attribute_type': 'attribute_value' },\n        #       },\n        #     ]"
+        "recipe": "source:\n    type: dynamodb\n    config:\n        platform_instance: \"AWS_ACCOUNT_ID\"\n        aws_access_key_id : '${AWS_ACCESS_KEY_ID}'\n        aws_secret_access_key : '${AWS_SECRET_ACCESS_KEY}'\n        # If there are items that have most representative fields of the table, users could use the\n        # `include_table_item` option to provide a list of primary keys of the table in dynamodb format.\n        # For each `region.table`, the list of primary keys can be at most 100.\n        # We include these items in addition to the first 100 items in the table when we scan it.\n        # include_table_item:\n        #   region.table_name:\n        #     [\n        #       {\n        #         'partition_key_name': { 'attribute_type': 'attribute_value' },\n        #         'sort_key_name': { 'attribute_type': 'attribute_value' },\n        #       },\n        #     ]"
     },
     {
         "urn": "urn:li:dataPlatform:glue",
@@ -223,4 +223,4 @@
         "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
         "recipe": "source:\n  type: <source-type>\n  config:\n    # Source-type specifics config\n    <source-configs>"
     }
-]
\ No newline at end of file
+]
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
index 7f9a0324c7bc6..a1c0a6e2d4d21 100644
--- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
@@ -1,21 +1,18 @@
-## Limitations
-
-For each region, the list table operation returns maximum number 100 tables, we need to further improve it by implementing pagination for listing tables
-
 ## Advanced Configurations
 
 ### Using `include_table_item` config
 
-If there are items that have most representative fields of the table, user could use the `include_table_item` option to provide a list of primary keys of a table in dynamodb format, those items from given primary keys will be included when we scan the table.
+If there are items that have most representative fields of the table, users could use the `include_table_item` option to provide a list of primary keys of the table in dynamodb format. We include these items in addition to the first 100 items in the table when we scan it.
 
-Take [AWS DynamoDB Developer Guide Example tables and data](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AppendixSampleTables.html) as an example, if user has a table `Reply` with composite primary key `Id` and `ReplyDateTime`, user can use `include_table_item` to include 2 items as following:
+Take [AWS DynamoDB Developer Guide Example tables and data](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AppendixSampleTables.html) as an example, if a account has a table `Reply` in the `us-west-2` region with composite primary key `Id` and `ReplyDateTime`, users can use `include_table_item` to include 2 items as following:
 
 Example:
 
 ```yml
-# put the table name and composite key in DynamoDB format
+# The table name should be in the format of region.table_name
+# The primary keys should be in the DynamoDB format
 include_table_item:
-  Reply:
+  us-west-2.Reply:
     [
       {
         "ReplyDateTime": { "S": "2015-09-22T19:58:22.947Z" },
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
index a48e8d5be04aa..598d0ecdb3786 100644
--- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
@@ -1,8 +1,8 @@
 ### Prerequisities
 
-In order to execute this source, you will need to create access key and secret keys that have DynamoDB read access. You can create these policies and attach to your account or can ask your account admin to attach these policies to your account.
+In order to execute this source, you need to attach the `AmazonDynamoDBReadOnlyAccess` policy to a user in your AWS account. Then create an API access key and secret for the user.
 
-For access key permissions, you can create a policy with permissions below and attach to your account, you can find more details in [Managing access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
+For a user to be able to create API access key, it needs the following access key permissions. Your AWS account admin can create a policy with these permissions and attach to the user, you can find more details in [Managing access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
 
 ```json
 {
@@ -22,5 +22,3 @@ For access key permissions, you can create a policy with permissions below and a
   ]
 }
 ```
-
-For DynamoDB read access, you can simply attach AWS managed policy `AmazonDynamoDBReadOnlyAccess` to your account, you can find more details in [Attaching a policy to an IAM user group](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_groups_manage_attach-policy.html)
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
index bd41637907b5c..4f4edc9a7d496 100644
--- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
@@ -4,16 +4,14 @@ source:
     platform_instance: "AWS_ACCOUNT_ID"
     aws_access_key_id: "${AWS_ACCESS_KEY_ID}"
     aws_secret_access_key: "${AWS_SECRET_ACCESS_KEY}"
-    # User could use the below option to provide a list of primary keys of a table in dynamodb format,
-    # those items from given primary keys will be included when we scan the table.
-    # For each table we can retrieve up to 16 MB of data, which can contain as many as 100 items.
-    # We'll enforce the the primary keys list size not to exceed 100
-    # The total items we'll try to retrieve in these two scenarios:
-    # 1. If user don't specify include_table_item: we'll retrieve up to 100 items
-    # 2. If user specifies include_table_item: we'll retrieve up to 100 items plus user specified items in
-    # the table, with a total not more than 200 items
+    #
+    # If there are items that have most representative fields of the table, users could use the
+    # `include_table_item` option to provide a list of primary keys of the table in dynamodb format.
+    # For each `region.table`, the list of primary keys can be at most 100.
+    # We include these items in addition to the first 100 items in the table when we scan it.
+    #
     # include_table_item:
-    #   table_name:
+    #   region.table_name:
     #     [
     #       {
     #         "partition_key_name": { "attribute_type": "attribute_value" },
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
index 6b7c118373673..d7f3dfb9279fb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
@@ -1,5 +1,5 @@
 import logging
-from dataclasses import field
+from dataclasses import dataclass, field
 from typing import Any, Counter, Dict, Iterable, List, Optional, Type, Union
 
 import boto3
@@ -79,12 +79,13 @@ class DynamoDBConfig(DatasetSourceConfigMixin, StatefulIngestionConfigBase):
 
     table_pattern: AllowDenyPattern = Field(
         default=AllowDenyPattern.allow_all(),
-        description="regex patterns for tables to filter in ingestion.",
+        description="Regex patterns for tables to filter in ingestion. The table name format is 'region.table'",
     )
     # Custom Stateful Ingestion settings
     stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
 
 
+@dataclass
 class DynamoDBSourceReport(StaleEntityRemovalSourceReport):
     filtered: List[str] = field(default_factory=list)
 
@@ -175,39 +176,30 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
 
         # traverse databases in sorted order so output is consistent
         for region in dynamodb_regions:
-            try:
-                # create a new dynamodb client for each region,
-                # it seems for one client we could only list the table of one specific region,
-                # the list_tables() method don't take any config that related to region
-                # TODO: list table returns maximum number 100, need to implement pagination here
-                dynamodb_client = boto3.client(
-                    "dynamodb",
-                    region_name=region,
-                    aws_access_key_id=self.config.aws_access_key_id
-                    if self.config.aws_access_key_id
-                    else None,
-                    aws_secret_access_key=self.config.aws_secret_access_key.get_secret_value()
-                    if self.config.aws_secret_access_key
-                    else None,
-                )
-                table_names: List[str] = dynamodb_client.list_tables()["TableNames"]
-            except Exception as ex:
-                # TODO: If regions is config input then this would be self.report.report_warning,
-                # we can create dynamodb client to take aws region or regions as user input
-                logger.info(f"exception happen in region {region}, skipping: {ex}")
-                continue
-            for table_name in sorted(table_names):
-                if not self.config.table_pattern.allowed(table_name):
+            logger.info(f"Processing region {region}")
+            # create a new dynamodb client for each region,
+            # it seems for one client we could only list the table of one specific region,
+            # the list_tables() method don't take any config that related to region
+            dynamodb_client = boto3.client(
+                "dynamodb",
+                region_name=region,
+                aws_access_key_id=self.config.aws_access_key_id,
+                aws_secret_access_key=self.config.aws_secret_access_key.get_secret_value(),
+            )
+
+            for table_name in self._list_tables(dynamodb_client):
+                dataset_name = f"{region}.{table_name}"
+                if not self.config.table_pattern.allowed(dataset_name):
+                    logger.debug(f"skipping table: {dataset_name}")
+                    self.report.report_dropped(dataset_name)
                     continue
+
+                logger.debug(f"Processing table: {dataset_name}")
                 table_info = dynamodb_client.describe_table(TableName=table_name)[
                     "Table"
                 ]
                 account_id = table_info["TableArn"].split(":")[4]
-                if not self.config.table_pattern.allowed(table_name):
-                    self.report.report_dropped(table_name)
-                    continue
                 platform_instance = self.config.platform_instance or account_id
-                dataset_name = f"{region}.{table_name}"
                 dataset_urn = make_dataset_urn_with_platform_instance(
                     platform=self.platform,
                     platform_instance=platform_instance,
@@ -222,7 +214,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 )
                 primary_key_dict = self.extract_primary_key_from_key_schema(table_info)
                 table_schema = self.construct_schema_from_dynamodb(
-                    dynamodb_client, table_name
+                    dynamodb_client, region, table_name
                 )
                 schema_metadata = self.construct_schema_metadata(
                     table_name,
@@ -254,9 +246,25 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                     aspect=platform_instance_aspect,
                 ).as_workunit()
 
+    def _list_tables(
+        self,
+        dynamodb_client: BaseClient,
+    ) -> Iterable[str]:
+        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/paginator/ListTables.html
+        try:
+            for page in dynamodb_client.get_paginator("list_tables").paginate():
+                table_names = page.get("TableNames")
+                if table_names:
+                    yield from table_names
+        except Exception as ex:
+            # TODO: If regions is config input then this would be self.report.report_warning,
+            # we can create dynamodb client to take aws region or regions as user input
+            logger.info(f"Exception happened while listing tables, skipping: {ex}")
+
     def construct_schema_from_dynamodb(
         self,
         dynamodb_client: BaseClient,
+        region: str,
         table_name: str,
     ) -> Dict[str, SchemaDescription]:
         """
@@ -275,7 +283,7 @@ def construct_schema_from_dynamodb(
         The MaxItems is the total number of items to return, and PageSize is the size of each page, we are assigning same value
         to these two config. If MaxItems is more than PageSize then we expect MaxItems / PageSize pages in response_iterator will return
         """
-        self.include_table_item_to_schema(dynamodb_client, table_name, schema)
+        self.include_table_item_to_schema(dynamodb_client, region, table_name, schema)
         response_iterator = paginator.paginate(
             TableName=table_name,
             PaginationConfig={
@@ -294,33 +302,38 @@ def construct_schema_from_dynamodb(
     def include_table_item_to_schema(
         self,
         dynamodb_client: Any,
+        region: str,
         table_name: str,
         schema: Dict[str, SchemaDescription],
     ) -> None:
         """
-        It will look up in the config include_table_item dict to see if the current table name exists as key,
+        It will look up in the config include_table_item dict to see if "region.table_name" exists as key,
         if it exists then get the items by primary key from the table and put it to schema
         """
         if self.config.include_table_item is None:
             return
-        if table_name not in self.config.include_table_item.keys():
+        dataset_name = f"{region}.{table_name}"
+        if dataset_name not in self.config.include_table_item.keys():
             return
-        primary_key_list = self.config.include_table_item.get(table_name)
+        primary_key_list = self.config.include_table_item.get(dataset_name)
         assert isinstance(primary_key_list, List)
         if len(primary_key_list) > MAX_PRIMARY_KEYS_SIZE:
             logger.info(
-                f"the provided primary keys list size exceeded the max size for table {table_name}, we'll only process the first {MAX_PRIMARY_KEYS_SIZE} items"
+                f"the provided primary keys list size exceeded the max size for table {dataset_name}, we'll only process the first {MAX_PRIMARY_KEYS_SIZE} items"
             )
             primary_key_list = primary_key_list[0:MAX_PRIMARY_KEYS_SIZE]
         items = []
         response = dynamodb_client.batch_get_item(
             RequestItems={table_name: {"Keys": primary_key_list}}
-        ).get("Responses", None)
+        ).get("Responses")
         if response is None:
             logger.error(
                 f"failed to retrieve item from table {table_name} by the given key {primary_key_list}"
             )
             return
+        logger.debug(
+            f"successfully retrieved {len(primary_key_list)} items based on supplied primary key list"
+        )
         items = response.get(table_name)
 
         self.construct_schema_from_items(items, schema)

From c9309ff1579e31c79d2d8e764a89f7c5e3ff483c Mon Sep 17 00:00:00 2001
From: Shirshanka Das <shirshanka@apache.org>
Date: Thu, 5 Oct 2023 09:07:12 -0700
Subject: [PATCH 12/98] feat(ci): enable ci to run on PR-s targeting all
 branches (#8933)

---
 .github/workflows/airflow-plugin.yml        |  2 +-
 .github/workflows/build-and-test.yml        | 11 +++--------
 .github/workflows/check-datahub-jars.yml    |  9 ++-------
 .github/workflows/close-stale-issues.yml    |  4 +++-
 .github/workflows/code-checks.yml           | 13 ++++---------
 .github/workflows/docker-postgres-setup.yml |  3 +--
 .github/workflows/docker-unified.yml        |  7 +++----
 .github/workflows/documentation.yml         |  2 +-
 .github/workflows/lint-actions.yml          |  4 +++-
 .github/workflows/metadata-ingestion.yml    |  2 +-
 .github/workflows/metadata-io.yml           |  2 +-
 .github/workflows/spark-smoke-test.yml      |  2 +-
 12 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml
index a250bddcc16d1..54042d104d906 100644
--- a/.github/workflows/airflow-plugin.yml
+++ b/.github/workflows/airflow-plugin.yml
@@ -10,7 +10,7 @@ on:
       - "metadata-models/**"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - ".github/**"
       - "metadata-ingestion-modules/airflow-plugin/**"
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index 96b9bb2a14933..25f3957e8f086 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -8,7 +8,7 @@ on:
       - "**.md"
   pull_request:
     branches:
-      - master
+      - "**"
     paths-ignore:
       - "docs/**"
       - "**.md"
@@ -24,17 +24,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        command:
-          [
+        command: [
             # metadata-ingestion and airflow-plugin each have dedicated build jobs
             "except_metadata_ingestion",
             "frontend"
           ]
-        timezone:
-          [
-            "UTC",
-            "America/New_York",
-          ]
+        timezone: ["UTC", "America/New_York"]
     runs-on: ubuntu-latest
     timeout-minutes: 60
     steps:
diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml
index 841a9ed5f9bc7..9a17a70e7f8d4 100644
--- a/.github/workflows/check-datahub-jars.yml
+++ b/.github/workflows/check-datahub-jars.yml
@@ -10,7 +10,7 @@ on:
       - "**.md"
   pull_request:
     branches:
-      - master
+      - "**"
     paths-ignore:
       - "docker/**"
       - "docs/**"
@@ -28,12 +28,7 @@ jobs:
       max-parallel: 1
       fail-fast: false
       matrix:
-        command:
-          [
-            "datahub-client",
-            "datahub-protobuf",
-            "spark-lineage"
-          ]
+        command: ["datahub-client", "datahub-protobuf", "spark-lineage"]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
diff --git a/.github/workflows/close-stale-issues.yml b/.github/workflows/close-stale-issues.yml
index a7809087702ac..98e3041f28804 100644
--- a/.github/workflows/close-stale-issues.yml
+++ b/.github/workflows/close-stale-issues.yml
@@ -18,7 +18,9 @@ jobs:
           days-before-issue-stale: 30
           days-before-issue-close: 30
           stale-issue-label: "stale"
-          stale-issue-message: "This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io"
+          stale-issue-message:
+            "This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io.\
+            \ For feature requests please use https://feature-requests.datahubproject.io"
           close-issue-message: "This issue was closed because it has been inactive for 30 days since being marked as stale."
           days-before-pr-stale: -1
           days-before-pr-close: -1
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index 6ce19a5b4616e..e12971b8a6208 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -10,7 +10,7 @@ on:
       - ".github/workflows/code-checks.yml"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "metadata-io/**"
       - "datahub-web-react/**"
@@ -21,17 +21,12 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
-
 jobs:
   code_check:
     strategy:
       fail-fast: false
       matrix:
-        command:
-          [
-            "check_event_type.py",
-            "check_policies.py"
-          ]
+        command: ["check_event_type.py", "check_policies.py"]
     name: run code checks
     runs-on: ubuntu-latest
     steps:
@@ -43,5 +38,5 @@ jobs:
         with:
           python-version: "3.10"
       - name: run check ${{ matrix.command }}
-        run: |
-          python .github/scripts/${{ matrix.command }}
\ No newline at end of file
+        run: |-
+          python .github/scripts/${{ matrix.command }}
diff --git a/.github/workflows/docker-postgres-setup.yml b/.github/workflows/docker-postgres-setup.yml
index a5d421d4b7ff5..fda4349f90bf7 100644
--- a/.github/workflows/docker-postgres-setup.yml
+++ b/.github/workflows/docker-postgres-setup.yml
@@ -8,7 +8,7 @@ on:
       - ".github/workflows/docker-postgres-setup.yml"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "docker/postgres-setup/**"
       - ".github/workflows/docker-postgres-setup.yml"
@@ -61,4 +61,3 @@ jobs:
           context: .
           file: ./docker/postgres-setup/Dockerfile
           platforms: linux/amd64,linux/arm64
-
diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index 2aae6bf51529d..8666a5e2e2171 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -8,7 +8,7 @@ on:
       - "**.md"
   pull_request:
     branches:
-      - master
+      - "**"
     paths-ignore:
       - "docs/**"
       - "**.md"
@@ -545,7 +545,6 @@ jobs:
         id: tag
         run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
 
-
   datahub_ingestion_slim_build:
     name: Build and Push DataHub Ingestion Docker Images
     runs-on: ubuntu-latest
@@ -809,8 +808,8 @@ jobs:
           DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
           DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }}
           ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }}
-          ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5'
-          ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
+          ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5"
+          ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
         run: |
           ./smoke-test/run-quickstart.sh
       - name: sleep 60s
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 68432a4feb13d..ebe2990f3a3cd 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -3,7 +3,7 @@ name: documentation
 on:
   pull_request:
     branches:
-      - master
+      - "**"
   push:
     branches:
       - master
diff --git a/.github/workflows/lint-actions.yml b/.github/workflows/lint-actions.yml
index b285e46da4857..6f34bf292bf51 100644
--- a/.github/workflows/lint-actions.yml
+++ b/.github/workflows/lint-actions.yml
@@ -2,8 +2,10 @@ name: Lint actions
 on:
   pull_request:
     paths:
-      - '.github/workflows/**'
+      - ".github/workflows/**"
 
+    branches:
+      - "**"
 jobs:
   actionlint:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml
index dea4603868f8e..699ca330ce0ac 100644
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@@ -9,7 +9,7 @@ on:
       - "metadata-models/**"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - ".github/**"
       - "metadata-ingestion/**"
diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml
index e37ddd0ce4e86..48f230ce14c8d 100644
--- a/.github/workflows/metadata-io.yml
+++ b/.github/workflows/metadata-io.yml
@@ -10,7 +10,7 @@ on:
       - "metadata-io/**"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "**/*.gradle"
       - "li-utils/**"
diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml
index b2482602e7548..541b2019b93ef 100644
--- a/.github/workflows/spark-smoke-test.yml
+++ b/.github/workflows/spark-smoke-test.yml
@@ -12,7 +12,7 @@ on:
       - ".github/workflows/spark-smoke-test.yml"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "metadata_models/**"
       - "metadata-integration/java/datahub-client/**"

From 3cede10ab30e22dcad286bd42bcd154732e40942 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 5 Oct 2023 13:29:47 -0400
Subject: [PATCH 13/98] feat(ingest/dbt): support `use_compiled_code` and
 `test_warnings_are_errors` (#8956)

---
 .../datahub/configuration/source_common.py    |  2 +-
 ...ation.py => validate_field_deprecation.py} | 14 +++++--
 .../ingestion/source/dbt/dbt_common.py        | 41 ++++++++++++++-----
 .../src/datahub/ingestion/source/file.py      |  2 +-
 .../ingestion/source/powerbi/config.py        |  2 +-
 .../ingestion/source/redshift/config.py       |  2 +-
 .../src/datahub/ingestion/source/s3/config.py |  2 +-
 .../ingestion/source/sql/clickhouse.py        |  2 +-
 .../ingestion/source/sql/sql_config.py        |  2 +-
 .../src/datahub/ingestion/source/tableau.py   |  2 +-
 .../tests/unit/test_pydantic_validators.py    |  2 +-
 11 files changed, 51 insertions(+), 22 deletions(-)
 rename metadata-ingestion/src/datahub/configuration/{pydantic_field_deprecation.py => validate_field_deprecation.py} (74%)

diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py
index 37b93f3e598e1..a9f891ddb7b1e 100644
--- a/metadata-ingestion/src/datahub/configuration/source_common.py
+++ b/metadata-ingestion/src/datahub/configuration/source_common.py
@@ -4,7 +4,7 @@
 from pydantic.fields import Field
 
 from datahub.configuration.common import ConfigModel, ConfigurationError
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.metadata.schema_classes import FabricTypeClass
 
 DEFAULT_ENV = FabricTypeClass.PROD
diff --git a/metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
similarity index 74%
rename from metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py
rename to metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
index ed82acb594ed7..6134c4dab4817 100644
--- a/metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py
+++ b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
@@ -1,20 +1,28 @@
 import warnings
-from typing import Optional, Type
+from typing import Any, Optional, Type
 
 import pydantic
 
 from datahub.configuration.common import ConfigurationWarning
 from datahub.utilities.global_warning_util import add_global_warning
 
+_unset = object()
 
-def pydantic_field_deprecated(field: str, message: Optional[str] = None) -> classmethod:
+
+def pydantic_field_deprecated(
+    field: str,
+    warn_if_value_is_not: Any = _unset,
+    message: Optional[str] = None,
+) -> classmethod:
     if message:
         output = message
     else:
         output = f"{field} is deprecated and will be removed in a future release. Please remove it from your config."
 
     def _validate_deprecated(cls: Type, values: dict) -> dict:
-        if field in values:
+        if field in values and (
+            warn_if_value_is_not is _unset or values[field] != warn_if_value_is_not
+        ):
             add_global_warning(output)
             warnings.warn(output, ConfigurationWarning, stacklevel=2)
         return values
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index f9b71892975b4..0f5c08eb6ac54 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -18,8 +18,8 @@
     ConfigurationError,
     LineageConfig,
 )
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.emitter import mce_builder
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext
@@ -214,7 +214,9 @@ class DBTCommonConfig(
         default=False,
         description="Use model identifier instead of model name if defined (if not, default to model name).",
     )
-    _deprecate_use_identifiers = pydantic_field_deprecated("use_identifiers")
+    _deprecate_use_identifiers = pydantic_field_deprecated(
+        "use_identifiers", warn_if_value_is_not=False
+    )
 
     entities_enabled: DBTEntitiesEnabled = Field(
         DBTEntitiesEnabled(),
@@ -278,6 +280,14 @@ class DBTCommonConfig(
         description="When enabled, converts column URNs to lowercase to ensure cross-platform compatibility. "
         "If `target_platform` is Snowflake, the default is True.",
     )
+    use_compiled_code: bool = Field(
+        default=False,
+        description="When enabled, uses the compiled dbt code instead of the raw dbt node definition.",
+    )
+    test_warnings_are_errors: bool = Field(
+        default=False,
+        description="When enabled, dbt test warnings will be treated as failures.",
+    )
 
     @validator("target_platform")
     def validate_target_platform_value(cls, target_platform: str) -> str:
@@ -811,7 +821,7 @@ def _make_assertion_from_test(
                         mce_builder.make_schema_field_urn(upstream_urn, column_name)
                     ],
                     nativeType=node.name,
-                    logic=node.compiled_code if node.compiled_code else node.raw_code,
+                    logic=node.compiled_code or node.raw_code,
                     aggregation=AssertionStdAggregationClass._NATIVE_,
                     nativeParameters=string_map(kw_args),
                 ),
@@ -825,7 +835,7 @@ def _make_assertion_from_test(
                     dataset=upstream_urn,
                     scope=DatasetAssertionScopeClass.DATASET_ROWS,
                     operator=AssertionStdOperatorClass._NATIVE_,
-                    logic=node.compiled_code if node.compiled_code else node.raw_code,
+                    logic=node.compiled_code or node.raw_code,
                     nativeType=node.name,
                     aggregation=AssertionStdAggregationClass._NATIVE_,
                     nativeParameters=string_map(kw_args),
@@ -856,6 +866,10 @@ def _make_assertion_result_from_test(
             result=AssertionResultClass(
                 type=AssertionResultTypeClass.SUCCESS
                 if test_result.status == "pass"
+                or (
+                    not self.config.test_warnings_are_errors
+                    and test_result.status == "warn"
+                )
                 else AssertionResultTypeClass.FAILURE,
                 nativeResults=test_result.native_results,
             ),
@@ -1007,8 +1021,8 @@ def create_platform_mces(
                     aspects.append(upstream_lineage_class)
 
                 # add view properties aspect
-                if node.raw_code and node.language == "sql":
-                    view_prop_aspect = self._create_view_properties_aspect(node)
+                view_prop_aspect = self._create_view_properties_aspect(node)
+                if view_prop_aspect:
                     aspects.append(view_prop_aspect)
 
                 # emit subtype mcp
@@ -1133,14 +1147,21 @@ def _create_dataset_properties_aspect(
     def get_external_url(self, node: DBTNode) -> Optional[str]:
         pass
 
-    def _create_view_properties_aspect(self, node: DBTNode) -> ViewPropertiesClass:
+    def _create_view_properties_aspect(
+        self, node: DBTNode
+    ) -> Optional[ViewPropertiesClass]:
+        view_logic = (
+            node.compiled_code if self.config.use_compiled_code else node.raw_code
+        )
+
+        if node.language != "sql" or not view_logic:
+            return None
+
         materialized = node.materialization in {"table", "incremental", "snapshot"}
-        # this function is only called when raw sql is present. assert is added to satisfy lint checks
-        assert node.raw_code is not None
         view_properties = ViewPropertiesClass(
             materialized=materialized,
             viewLanguage="SQL",
-            viewLogic=node.raw_code,
+            viewLogic=view_logic,
         )
         return view_properties
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/file.py b/metadata-ingestion/src/datahub/ingestion/source/file.py
index de61fa8481c58..590aa59f7b5b6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/file.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/file.py
@@ -16,7 +16,7 @@
 from pydantic.fields import Field
 
 from datahub.configuration.common import ConfigEnum, ConfigModel, ConfigurationError
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index a8c7e48f3785c..96729f4c60c6c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -9,8 +9,8 @@
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalSourceReport,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index 93850607e551e..804a14b0fe1cf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -7,8 +7,8 @@
 
 from datahub.configuration import ConfigModel
 from datahub.configuration.common import AllowDenyPattern
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetLineageProviderConfigBase
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
 from datahub.ingestion.source.sql.postgres import BasePostgresConfig
 from datahub.ingestion.source.state.stateful_ingestion_base import (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
index f1dd622efb746..9b5296f0b9dd5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
@@ -5,8 +5,8 @@
 from pydantic.fields import Field
 
 from datahub.configuration.common import AllowDenyPattern
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
 from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
index 1626f86b92545..8873038079bad 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@@ -19,9 +19,9 @@
 from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
 
 import datahub.emitter.mce_builder as builder
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetLineageProviderConfigBase
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.emitter import mce_builder
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.decorators import (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
index 8f1e04b915f3b..677d32c8bac08 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
@@ -7,8 +7,8 @@
 from pydantic import Field
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StatefulStaleMetadataRemovalConfig,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index 6214cba342622..e347cd26d245a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -37,11 +37,11 @@
     ConfigModel,
     ConfigurationError,
 )
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import (
     DatasetLineageProviderConfigBase,
     DatasetSourceConfigMixin,
 )
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import (
     ContainerKey,
diff --git a/metadata-ingestion/tests/unit/test_pydantic_validators.py b/metadata-ingestion/tests/unit/test_pydantic_validators.py
index 07d86043a35bf..3e9ec6cbaf357 100644
--- a/metadata-ingestion/tests/unit/test_pydantic_validators.py
+++ b/metadata-ingestion/tests/unit/test_pydantic_validators.py
@@ -4,7 +4,7 @@
 from pydantic import ValidationError
 
 from datahub.configuration.common import ConfigModel
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.validate_field_removal import pydantic_removed_field
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.utilities.global_warning_util import get_global_warnings

From debac3cf5c31b471a5a82da8d18fb8303cc8b9d0 Mon Sep 17 00:00:00 2001
From: Patrick Franco Braz <patrickfbraz@poli.ufrj.br>
Date: Thu, 5 Oct 2023 17:47:10 -0300
Subject: [PATCH 14/98] refactor(boot): increases wait timeout for servlets
 initialization (#8947)

Co-authored-by: RyanHolstien <RyanHolstien@users.noreply.github.com>
---
 .../configuration/src/main/resources/application.yml     | 3 +++
 .../metadata/boot/OnBootApplicationListener.java         | 9 +++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 4be31b2b6bb15..4dfd96ac75c6c 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -276,6 +276,9 @@ bootstrap:
     enabled: ${UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED:false} # enable to run the upgrade to migrate legacy default browse paths to new ones
   backfillBrowsePathsV2:
     enabled: ${BACKFILL_BROWSE_PATHS_V2:false} # Enables running the backfill of browsePathsV2 upgrade step. There are concerns about the load of this step so hiding it behind a flag. Deprecating in favor of running through SystemUpdate
+  servlets:
+    waitTimeout: ${BOOTSTRAP_SERVLETS_WAITTIMEOUT:60} # Total waiting time in seconds for servlets to initialize
+
 
 systemUpdate:
   initialBackOffMs: ${BOOTSTRAP_SYSTEM_UPDATE_INITIAL_BACK_OFF_MILLIS:5000}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java
index 980cafaceae27..032b934a7ba87 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java
@@ -15,15 +15,18 @@
 import org.apache.http.impl.client.HttpClients;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.event.ContextRefreshedEvent;
 import org.springframework.context.event.EventListener;
 import org.springframework.stereotype.Component;
 import org.springframework.web.context.WebApplicationContext;
+import org.springframework.context.annotation.Configuration;
 
 
 /**
  * Responsible for coordinating starting steps that happen before the application starts up.
  */
+@Configuration
 @Slf4j
 @Component
 public class OnBootApplicationListener {
@@ -44,6 +47,8 @@ public class OnBootApplicationListener {
   @Qualifier("configurationProvider")
   private ConfigurationProvider provider;
 
+  @Value("${bootstrap.servlets.waitTimeout}")
+  private int _servletsWaitTimeout;
 
   @EventListener(ContextRefreshedEvent.class)
   public void onApplicationEvent(@Nonnull ContextRefreshedEvent event) {
@@ -62,7 +67,7 @@ public void onApplicationEvent(@Nonnull ContextRefreshedEvent event) {
   public Runnable isSchemaRegistryAPIServletReady() {
     return () -> {
         final HttpGet request = new HttpGet(provider.getKafka().getSchemaRegistry().getUrl());
-        int timeouts = 30;
+        int timeouts = _servletsWaitTimeout;
         boolean openAPIServeletReady = false;
         while (!openAPIServeletReady && timeouts > 0) {
           try {
@@ -79,7 +84,7 @@ public Runnable isSchemaRegistryAPIServletReady() {
           timeouts--;
         }
         if (!openAPIServeletReady) {
-          log.error("Failed to bootstrap DataHub, OpenAPI servlet was not ready after 30 seconds");
+          log.error("Failed to bootstrap DataHub, OpenAPI servlet was not ready after {} seconds", timeouts);
           System.exit(1);
         } else {
         _bootstrapManager.start();

From 26bc039b967d3a62a7079522b702e97ed8ad8d27 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Thu, 5 Oct 2023 23:23:15 -0400
Subject: [PATCH 15/98] fix(ingest/unity): Remove metastore from ingestion and
 urns; standardize platform instance; add notebook filter (#8943)

---
 docs/how/updating-datahub.md                  |  5 +
 .../src/datahub/emitter/mcp_builder.py        | 10 +-
 .../datahub/ingestion/source/unity/config.py  | 45 ++++++++-
 .../datahub/ingestion/source/unity/proxy.py   | 16 +--
 .../ingestion/source/unity/proxy_types.py     | 19 ++--
 .../datahub/ingestion/source/unity/source.py  | 99 ++++++++++++-------
 6 files changed, 145 insertions(+), 49 deletions(-)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 4df8d435cf1c4..5d0ad5eaf8f7e 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -9,6 +9,11 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
 - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
 - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
+- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
+This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future.
+If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
+Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
+`datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
 
 ### Potential Downtime
 
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 06f689dfd317b..65e0c0d6ba60d 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -94,7 +94,15 @@ class MetastoreKey(ContainerKey):
     metastore: str
 
 
-class CatalogKey(MetastoreKey):
+class CatalogKeyWithMetastore(MetastoreKey):
+    catalog: str
+
+
+class UnitySchemaKeyWithMetastore(CatalogKeyWithMetastore):
+    unity_schema: str
+
+
+class CatalogKey(ContainerKey):
     catalog: str
 
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index a49c789a82f27..f259fa260f653 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from datetime import datetime, timedelta, timezone
 from typing import Any, Dict, Optional
@@ -21,6 +22,9 @@
     OperationConfig,
     is_profiling_enabled,
 )
+from datahub.utilities.global_warning_util import add_global_warning
+
+logger = logging.getLogger(__name__)
 
 
 class UnityCatalogProfilerConfig(ConfigModel):
@@ -97,9 +101,25 @@ class UnityCatalogSourceConfig(
         description="Name of the workspace. Default to deployment name present in workspace_url",
     )
 
+    include_metastore: bool = pydantic.Field(
+        default=True,
+        description=(
+            "Whether to ingest the workspace's metastore as a container and include it in all urns."
+            " Changing this will affect the urns of all entities in the workspace."
+            " This will be disabled by default in the future,"
+            " so it is recommended to set this to `False` for new ingestions."
+            " If you have an existing unity catalog ingestion, you'll want to avoid duplicates by soft deleting existing data."
+            " If stateful ingestion is enabled, running with `include_metastore: false` should be sufficient."
+            " Otherwise, we recommend deleting via the cli: `datahub delete --platform databricks` and re-ingesting with `include_metastore: false`."
+        ),
+    )
+
     ingest_data_platform_instance_aspect: Optional[bool] = pydantic.Field(
         default=False,
-        description="Option to enable/disable ingestion of the data platform instance aspect. The default data platform instance id for a dataset is workspace_name",
+        description=(
+            "Option to enable/disable ingestion of the data platform instance aspect."
+            " The default data platform instance id for a dataset is workspace_name"
+        ),
     )
 
     _only_ingest_assigned_metastore_removed = pydantic_removed_field(
@@ -122,6 +142,16 @@ class UnityCatalogSourceConfig(
         default=AllowDenyPattern.allow_all(),
         description="Regex patterns for tables to filter in ingestion. Specify regex to match the entire table name in `catalog.schema.table` format. e.g. to match all tables starting with customer in Customer catalog and public schema, use the regex `Customer\\.public\\.customer.*`.",
     )
+
+    notebook_pattern: AllowDenyPattern = Field(
+        default=AllowDenyPattern.allow_all(),
+        description=(
+            "Regex patterns for notebooks to filter in ingestion, based on notebook *path*."
+            " Specify regex to match the entire notebook path in `/<dir>/.../<name>` format."
+            " e.g. to match all notebooks in the root Shared directory, use the regex `/Shared/.*`."
+        ),
+    )
+
     domain: Dict[str, AllowDenyPattern] = Field(
         default=dict(),
         description='Attach domains to catalogs, schemas or tables during ingestion using regex patterns. Domain key can be a guid like *urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba* or a string like "Marketing".) If you provide strings, then datahub will attempt to resolve this name to a guid, and will error out if this fails. There can be multiple domain keys specified.',
@@ -182,3 +212,16 @@ def workspace_url_should_start_with_http_scheme(cls, workspace_url: str) -> str:
                 "Workspace URL must start with http scheme. e.g. https://my-workspace.cloud.databricks.com"
             )
         return workspace_url
+
+    @pydantic.validator("include_metastore")
+    def include_metastore_warning(cls, v: bool) -> bool:
+        if v:
+            msg = (
+                "`include_metastore` is enabled."
+                " This is not recommended and will be disabled by default in the future, which is a breaking change."
+                " All databricks urns will change if you re-ingest with this disabled."
+                " We recommend soft deleting all databricks data and re-ingesting with `include_metastore` set to `False`."
+            )
+            logger.warning(msg)
+            add_global_warning(msg)
+        return v
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 2401f1c3d163c..529d9e7b563a5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -97,14 +97,13 @@ def __init__(
         self.report = report
 
     def check_basic_connectivity(self) -> bool:
-        self._workspace_client.metastores.summary()
-        return True
+        return bool(self._workspace_client.catalogs.list())
 
     def assigned_metastore(self) -> Metastore:
         response = self._workspace_client.metastores.summary()
         return self._create_metastore(response)
 
-    def catalogs(self, metastore: Metastore) -> Iterable[Catalog]:
+    def catalogs(self, metastore: Optional[Metastore]) -> Iterable[Catalog]:
         response = self._workspace_client.catalogs.list()
         if not response:
             logger.info("Catalogs not found")
@@ -247,7 +246,7 @@ def table_lineage(
             for item in response.get("upstreams") or []:
                 if "tableInfo" in item:
                     table_ref = TableReference.create_from_lineage(
-                        item["tableInfo"], table.schema.catalog.metastore.id
+                        item["tableInfo"], table.schema.catalog.metastore
                     )
                     if table_ref:
                         table.upstreams[table_ref] = {}
@@ -276,7 +275,7 @@ def get_column_lineage(self, table: Table, include_entity_lineage: bool) -> None
                     )
                     for item in response.get("upstream_cols", []):
                         table_ref = TableReference.create_from_lineage(
-                            item, table.schema.catalog.metastore.id
+                            item, table.schema.catalog.metastore
                         )
                         if table_ref:
                             table.upstreams.setdefault(table_ref, {}).setdefault(
@@ -305,10 +304,13 @@ def _create_metastore(
             comment=None,
         )
 
-    def _create_catalog(self, metastore: Metastore, obj: CatalogInfo) -> Catalog:
+    def _create_catalog(
+        self, metastore: Optional[Metastore], obj: CatalogInfo
+    ) -> Catalog:
+        catalog_name = self._escape_sequence(obj.name)
         return Catalog(
             name=obj.name,
-            id=f"{metastore.id}.{self._escape_sequence(obj.name)}",
+            id=f"{metastore.id}.{catalog_name}" if metastore else catalog_name,
             metastore=metastore,
             comment=obj.comment,
             owner=obj.owner,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index 54ac2e90d7c7e..18ac2475b51e0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -92,7 +92,7 @@ class Metastore(CommonProperty):
 
 @dataclass
 class Catalog(CommonProperty):
-    metastore: Metastore
+    metastore: Optional[Metastore]
     owner: Optional[str]
     type: CatalogType
 
@@ -130,7 +130,7 @@ class ServicePrincipal:
 
 @dataclass(frozen=True, order=True)
 class TableReference:
-    metastore: str
+    metastore: Optional[str]
     catalog: str
     schema: str
     table: str
@@ -138,17 +138,21 @@ class TableReference:
     @classmethod
     def create(cls, table: "Table") -> "TableReference":
         return cls(
-            table.schema.catalog.metastore.id,
+            table.schema.catalog.metastore.id
+            if table.schema.catalog.metastore
+            else None,
             table.schema.catalog.name,
             table.schema.name,
             table.name,
         )
 
     @classmethod
-    def create_from_lineage(cls, d: dict, metastore: str) -> Optional["TableReference"]:
+    def create_from_lineage(
+        cls, d: dict, metastore: Optional[Metastore]
+    ) -> Optional["TableReference"]:
         try:
             return cls(
-                metastore,
+                metastore.id if metastore else None,
                 d["catalog_name"],
                 d["schema_name"],
                 d.get("table_name", d["name"]),  # column vs table query output
@@ -158,7 +162,10 @@ def create_from_lineage(cls, d: dict, metastore: str) -> Optional["TableReferenc
             return None
 
     def __str__(self) -> str:
-        return f"{self.metastore}.{self.catalog}.{self.schema}.{self.table}"
+        if self.metastore:
+            return f"{self.metastore}.{self.catalog}.{self.schema}.{self.table}"
+        else:
+            return self.qualified_table_name
 
     @property
     def qualified_table_name(self) -> str:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index f2da1aece9fd4..4f7866aee7681 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -16,10 +16,12 @@
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import (
     CatalogKey,
+    CatalogKeyWithMetastore,
     ContainerKey,
     MetastoreKey,
     NotebookKey,
     UnitySchemaKey,
+    UnitySchemaKeyWithMetastore,
     add_dataset_to_container,
     gen_containers,
 )
@@ -127,7 +129,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
     config: UnityCatalogSourceConfig
     unity_catalog_api_proxy: UnityCatalogApiProxy
     platform: str = "databricks"
-    platform_instance_name: str
+    platform_instance_name: Optional[str]
 
     def get_report(self) -> UnityCatalogReport:
         return self.report
@@ -146,11 +148,13 @@ def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig):
         self.external_url_base = urljoin(self.config.workspace_url, "/explore/data")
 
         # Determine the platform_instance_name
-        self.platform_instance_name = (
-            config.workspace_name
-            if config.workspace_name is not None
-            else config.workspace_url.split("//")[1].split(".")[0]
-        )
+        self.platform_instance_name = self.config.platform_instance
+        if self.config.include_metastore:
+            self.platform_instance_name = (
+                config.workspace_name
+                if config.workspace_name is not None
+                else config.workspace_url.split("//")[1].split(".")[0]
+            )
 
         if self.config.domain:
             self.domain_registry = DomainRegistry(
@@ -247,10 +251,14 @@ def build_service_principal_map(self) -> None:
 
     def process_notebooks(self) -> Iterable[MetadataWorkUnit]:
         for notebook in self.unity_catalog_api_proxy.workspace_notebooks():
+            if not self.config.notebook_pattern.allowed(notebook.path):
+                self.report.notebooks.dropped(notebook.path)
+                continue
+
             self.notebooks[str(notebook.id)] = notebook
-            yield from self._gen_notebook_aspects(notebook)
+            yield from self._gen_notebook_workunits(notebook)
 
-    def _gen_notebook_aspects(self, notebook: Notebook) -> Iterable[MetadataWorkUnit]:
+    def _gen_notebook_workunits(self, notebook: Notebook) -> Iterable[MetadataWorkUnit]:
         mcps = MetadataChangeProposalWrapper.construct_many(
             entityUrn=self.gen_notebook_urn(notebook),
             aspects=[
@@ -270,7 +278,7 @@ def _gen_notebook_aspects(self, notebook: Notebook) -> Iterable[MetadataWorkUnit
                 ),
                 SubTypesClass(typeNames=[DatasetSubTypes.NOTEBOOK]),
                 BrowsePathsClass(paths=notebook.path.split("/")),
-                # TODO: Add DPI aspect
+                self._create_data_platform_instance_aspect(),
             ],
         )
         for mcp in mcps:
@@ -296,13 +304,17 @@ def _gen_notebook_lineage(self, notebook: Notebook) -> Optional[MetadataWorkUnit
         ).as_workunit()
 
     def process_metastores(self) -> Iterable[MetadataWorkUnit]:
-        metastore = self.unity_catalog_api_proxy.assigned_metastore()
-        yield from self.gen_metastore_containers(metastore)
+        metastore: Optional[Metastore] = None
+        if self.config.include_metastore:
+            metastore = self.unity_catalog_api_proxy.assigned_metastore()
+            yield from self.gen_metastore_containers(metastore)
         yield from self.process_catalogs(metastore)
+        if metastore and self.config.include_metastore:
+            self.report.metastores.processed(metastore.id)
 
-        self.report.metastores.processed(metastore.id)
-
-    def process_catalogs(self, metastore: Metastore) -> Iterable[MetadataWorkUnit]:
+    def process_catalogs(
+        self, metastore: Optional[Metastore]
+    ) -> Iterable[MetadataWorkUnit]:
         for catalog in self.unity_catalog_api_proxy.catalogs(metastore=metastore):
             if not self.config.catalog_pattern.allowed(catalog.id):
                 self.report.catalogs.dropped(catalog.id)
@@ -353,7 +365,7 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
         operation = self._create_table_operation_aspect(table)
         domain = self._get_domain_aspect(dataset_name=table.ref.qualified_table_name)
         ownership = self._create_table_ownership_aspect(table)
-        data_platform_instance = self._create_data_platform_instance_aspect(table)
+        data_platform_instance = self._create_data_platform_instance_aspect()
 
         if self.config.include_column_lineage:
             self.unity_catalog_api_proxy.get_column_lineage(
@@ -503,27 +515,37 @@ def gen_metastore_containers(
     def gen_catalog_containers(self, catalog: Catalog) -> Iterable[MetadataWorkUnit]:
         domain_urn = self._gen_domain_urn(catalog.name)
 
-        metastore_container_key = self.gen_metastore_key(catalog.metastore)
         catalog_container_key = self.gen_catalog_key(catalog)
         yield from gen_containers(
             container_key=catalog_container_key,
             name=catalog.name,
             sub_types=[DatasetContainerSubTypes.CATALOG],
             domain_urn=domain_urn,
-            parent_container_key=metastore_container_key,
+            parent_container_key=self.gen_metastore_key(catalog.metastore)
+            if self.config.include_metastore and catalog.metastore
+            else None,
             description=catalog.comment,
             owner_urn=self.get_owner_urn(catalog.owner),
             external_url=f"{self.external_url_base}/{catalog.name}",
         )
 
     def gen_schema_key(self, schema: Schema) -> ContainerKey:
-        return UnitySchemaKey(
-            unity_schema=schema.name,
-            platform=self.platform,
-            instance=self.config.platform_instance,
-            catalog=schema.catalog.name,
-            metastore=schema.catalog.metastore.name,
-        )
+        if self.config.include_metastore:
+            assert schema.catalog.metastore
+            return UnitySchemaKeyWithMetastore(
+                unity_schema=schema.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+                catalog=schema.catalog.name,
+                metastore=schema.catalog.metastore.name,
+            )
+        else:
+            return UnitySchemaKey(
+                unity_schema=schema.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+                catalog=schema.catalog.name,
+            )
 
     def gen_metastore_key(self, metastore: Metastore) -> MetastoreKey:
         return MetastoreKey(
@@ -532,13 +554,21 @@ def gen_metastore_key(self, metastore: Metastore) -> MetastoreKey:
             instance=self.config.platform_instance,
         )
 
-    def gen_catalog_key(self, catalog: Catalog) -> CatalogKey:
-        return CatalogKey(
-            catalog=catalog.name,
-            metastore=catalog.metastore.name,
-            platform=self.platform,
-            instance=self.config.platform_instance,
-        )
+    def gen_catalog_key(self, catalog: Catalog) -> ContainerKey:
+        if self.config.include_metastore:
+            assert catalog.metastore
+            return CatalogKeyWithMetastore(
+                catalog=catalog.name,
+                metastore=catalog.metastore.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+            )
+        else:
+            return CatalogKey(
+                catalog=catalog.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+            )
 
     def _gen_domain_urn(self, dataset_name: str) -> Optional[str]:
         domain_urn: Optional[str] = None
@@ -643,15 +673,16 @@ def _create_table_ownership_aspect(self, table: Table) -> Optional[OwnershipClas
         return None
 
     def _create_data_platform_instance_aspect(
-        self, table: Table
+        self,
     ) -> Optional[DataPlatformInstanceClass]:
-        # Only ingest the DPI aspect if the flag is true
         if self.config.ingest_data_platform_instance_aspect:
             return DataPlatformInstanceClass(
                 platform=make_data_platform_urn(self.platform),
                 instance=make_dataplatform_instance_urn(
                     self.platform, self.platform_instance_name
-                ),
+                )
+                if self.platform_instance_name
+                else None,
             )
         return None
 

From ea87febd2bdf0aebf603532be9448e6435f1fea9 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Fri, 6 Oct 2023 14:36:32 +0900
Subject: [PATCH 16/98] fix: add retry for fetch_url (#8958)

---
 docs-website/download_historical_versions.py | 34 ++++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/docs-website/download_historical_versions.py b/docs-website/download_historical_versions.py
index 83157edc1972c..53ee9cf1e63ef 100644
--- a/docs-website/download_historical_versions.py
+++ b/docs-website/download_historical_versions.py
@@ -1,6 +1,7 @@
 import json
 import os
 import tarfile
+import time
 import urllib.request
 
 repo_url = "https://api.github.com/repos/datahub-project/static-assets"
@@ -16,17 +17,30 @@ def download_file(url, destination):
                 f.write(chunk)
 
 
-def fetch_urls(repo_url: str, folder_path: str, file_format: str):
+def fetch_urls(
+    repo_url: str, folder_path: str, file_format: str, max_retries=3, retry_delay=5
+):
     api_url = f"{repo_url}/contents/{folder_path}"
-    response = urllib.request.urlopen(api_url)
-    data = response.read().decode("utf-8")
-    urls = [
-        file["download_url"]
-        for file in json.loads(data)
-        if file["name"].endswith(file_format)
-    ]
-    print(urls)
-    return urls
+    for attempt in range(max_retries + 1):
+        try:
+            response = urllib.request.urlopen(api_url)
+            if response.status == 403 or (500 <= response.status < 600):
+                raise Exception(f"HTTP Error {response.status}: {response.reason}")
+            data = response.read().decode("utf-8")
+            urls = [
+                file["download_url"]
+                for file in json.loads(data)
+                if file["name"].endswith(file_format)
+            ]
+            print(urls)
+            return urls
+        except Exception as e:
+            if attempt < max_retries:
+                print(f"Attempt {attempt + 1}/{max_retries}: {e}")
+                time.sleep(retry_delay)
+            else:
+                print(f"Max retries reached. Unable to fetch data.")
+                raise
 
 
 def extract_tar_file(destination_path):

From c80da8f949aea340af73c992ff6d2bd129eb55fe Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Fri, 6 Oct 2023 10:06:36 -0400
Subject: [PATCH 17/98] feat(ingest/unity): Use ThreadPoolExecutor for CLL
 (#8952)

---
 .../datahub/ingestion/source/unity/config.py  | 11 +++++
 .../datahub/ingestion/source/unity/proxy.py   | 46 ++++++++-----------
 .../datahub/ingestion/source/unity/report.py  |  2 +
 .../datahub/ingestion/source/unity/source.py  | 33 +++++++++----
 4 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index f259fa260f653..51390873712d3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -181,6 +181,17 @@ class UnityCatalogSourceConfig(
         description="Option to enable/disable lineage generation. Currently we have to call a rest call per column to get column level lineage due to the Databrick api which can slow down ingestion. ",
     )
 
+    column_lineage_column_limit: int = pydantic.Field(
+        default=300,
+        description="Limit the number of columns to get column level lineage. ",
+    )
+
+    lineage_max_workers: int = pydantic.Field(
+        default=5 * (os.cpu_count() or 4),
+        description="Number of worker threads to use for column lineage thread pool executor. Set to 1 to disable.",
+        hidden_from_docs=True,
+    )
+
     include_usage_statistics: bool = Field(
         default=True,
         description="Generate usage statistics.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 529d9e7b563a5..9bcdb200f180e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -233,9 +233,7 @@ def list_lineages_by_column(self, table_name: str, column_name: str) -> dict:
             body={"table_name": table_name, "column_name": column_name},
         )
 
-    def table_lineage(
-        self, table: Table, include_entity_lineage: bool
-    ) -> Optional[dict]:
+    def table_lineage(self, table: Table, include_entity_lineage: bool) -> None:
         # Lineage endpoint doesn't exists on 2.1 version
         try:
             response: dict = self.list_lineages_by_table(
@@ -256,34 +254,30 @@ def table_lineage(
             for item in response.get("downstreams") or []:
                 for notebook in item.get("notebookInfos") or []:
                     table.downstream_notebooks.add(notebook["notebook_id"])
-
-            return response
         except Exception as e:
-            logger.error(f"Error getting lineage: {e}")
-            return None
+            logger.warning(
+                f"Error getting lineage on table {table.ref}: {e}", exc_info=True
+            )
 
-    def get_column_lineage(self, table: Table, include_entity_lineage: bool) -> None:
+    def get_column_lineage(self, table: Table, column_name: str) -> None:
         try:
-            table_lineage = self.table_lineage(
-                table, include_entity_lineage=include_entity_lineage
+            response: dict = self.list_lineages_by_column(
+                table_name=table.ref.qualified_table_name,
+                column_name=column_name,
             )
-            if table_lineage:
-                for column in table.columns:
-                    response: dict = self.list_lineages_by_column(
-                        table_name=table.ref.qualified_table_name,
-                        column_name=column.name,
-                    )
-                    for item in response.get("upstream_cols", []):
-                        table_ref = TableReference.create_from_lineage(
-                            item, table.schema.catalog.metastore
-                        )
-                        if table_ref:
-                            table.upstreams.setdefault(table_ref, {}).setdefault(
-                                column.name, []
-                            ).append(item["name"])
-
+            for item in response.get("upstream_cols") or []:
+                table_ref = TableReference.create_from_lineage(
+                    item, table.schema.catalog.metastore
+                )
+                if table_ref:
+                    table.upstreams.setdefault(table_ref, {}).setdefault(
+                        column_name, []
+                    ).append(item["name"])
         except Exception as e:
-            logger.error(f"Error getting lineage: {e}")
+            logger.warning(
+                f"Error getting column lineage on table {table.ref}, column {column_name}: {e}",
+                exc_info=True,
+            )
 
     @staticmethod
     def _escape_sequence(value: str) -> str:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
index 808172a136bb3..fa61571fa92cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
@@ -18,6 +18,8 @@ class UnityCatalogReport(IngestionStageReport, StaleEntityRemovalSourceReport):
     table_profiles: EntityFilterReport = EntityFilterReport.field(type="table profile")
     notebooks: EntityFilterReport = EntityFilterReport.field(type="notebook")
 
+    num_column_lineage_skipped_column_count: int = 0
+
     num_queries: int = 0
     num_queries_dropped_parse_failure: int = 0
     num_queries_missing_table: int = 0  # Can be due to pattern filter
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 4f7866aee7681..27c1f341aa84d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -1,6 +1,7 @@
 import logging
 import re
 import time
+from concurrent.futures import ThreadPoolExecutor
 from datetime import timedelta
 from typing import Dict, Iterable, List, Optional, Set, Union
 from urllib.parse import urljoin
@@ -367,15 +368,7 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
         ownership = self._create_table_ownership_aspect(table)
         data_platform_instance = self._create_data_platform_instance_aspect()
 
-        if self.config.include_column_lineage:
-            self.unity_catalog_api_proxy.get_column_lineage(
-                table, include_entity_lineage=self.config.include_notebooks
-            )
-        elif self.config.include_table_lineage:
-            self.unity_catalog_api_proxy.table_lineage(
-                table, include_entity_lineage=self.config.include_notebooks
-            )
-        lineage = self._generate_lineage_aspect(dataset_urn, table)
+        lineage = self.ingest_lineage(table)
 
         if self.config.include_notebooks:
             for notebook_id in table.downstream_notebooks:
@@ -401,6 +394,28 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
             )
         ]
 
+    def ingest_lineage(self, table: Table) -> Optional[UpstreamLineageClass]:
+        if self.config.include_table_lineage:
+            self.unity_catalog_api_proxy.table_lineage(
+                table, include_entity_lineage=self.config.include_notebooks
+            )
+
+        if self.config.include_column_lineage and table.upstreams:
+            if len(table.columns) > self.config.column_lineage_column_limit:
+                self.report.num_column_lineage_skipped_column_count += 1
+
+            with ThreadPoolExecutor(
+                max_workers=self.config.lineage_max_workers
+            ) as executor:
+                for column in table.columns[: self.config.column_lineage_column_limit]:
+                    executor.submit(
+                        self.unity_catalog_api_proxy.get_column_lineage,
+                        table,
+                        column.name,
+                    )
+
+        return self._generate_lineage_aspect(self.gen_dataset_urn(table.ref), table)
+
     def _generate_lineage_aspect(
         self, dataset_urn: str, table: Table
     ) -> Optional[UpstreamLineageClass]:

From 8e7f286e71b36a07b4fedc0de1807354064a4fa5 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Fri, 6 Oct 2023 20:12:39 +0530
Subject: [PATCH 18/98] feat(ingest/snowflake): support profiling with sampling
 (#8902)

Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
---
 .../ingestion/source/bigquery_v2/profiler.py  | 127 ++++++----------
 .../ingestion/source/ge_data_profiler.py      |  32 +++--
 .../ingestion/source/ge_profiling_config.py   |   4 +-
 .../ingestion/source/redshift/profile.py      |  93 ++----------
 .../source/snowflake/snowflake_profiler.py    | 135 +++++-------------
 .../source/sql/sql_generic_profiler.py        | 105 +++++++++++++-
 6 files changed, 209 insertions(+), 287 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
index b3e88459917b3..8ae17600e0eea 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
@@ -1,12 +1,9 @@
-import dataclasses
 import logging
 from datetime import datetime
 from typing import Dict, Iterable, List, Optional, Tuple, cast
 
 from dateutil.relativedelta import relativedelta
 
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
 from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
@@ -15,7 +12,7 @@
     RANGE_PARTITION_NAME,
     BigqueryTable,
 )
-from datahub.ingestion.source.ge_data_profiler import GEProfilerRequest
+from datahub.ingestion.source.sql.sql_generic import BaseTable
 from datahub.ingestion.source.sql.sql_generic_profiler import (
     GenericProfiler,
     TableProfilerRequest,
@@ -25,12 +22,6 @@
 logger = logging.getLogger(__name__)
 
 
-@dataclasses.dataclass
-class BigqueryProfilerRequest(GEProfilerRequest):
-    table: BigqueryTable
-    profile_table_level_only: bool = False
-
-
 class BigqueryProfiler(GenericProfiler):
     config: BigQueryV2Config
     report: BigQueryV2Report
@@ -183,84 +174,54 @@ def get_workunits(
                     )
 
                 # Emit the profile work unit
-                profile_request = self.get_bigquery_profile_request(
-                    project=project_id, dataset=dataset, table=table
-                )
+                profile_request = self.get_profile_request(table, dataset, project_id)
                 if profile_request is not None:
+                    self.report.report_entity_profiled(profile_request.pretty_name)
                     profile_requests.append(profile_request)
 
         if len(profile_requests) == 0:
             return
-        yield from self.generate_wu_from_profile_requests(profile_requests)
-
-    def generate_wu_from_profile_requests(
-        self, profile_requests: List[BigqueryProfilerRequest]
-    ) -> Iterable[MetadataWorkUnit]:
-        table_profile_requests = cast(List[TableProfilerRequest], profile_requests)
-        for request, profile in self.generate_profiles(
-            table_profile_requests,
+        yield from self.generate_profile_workunits(
+            profile_requests,
             self.config.profiling.max_workers,
             platform=self.platform,
             profiler_args=self.get_profile_args(),
-        ):
-            if request is None or profile is None:
-                continue
-
-            request = cast(BigqueryProfilerRequest, request)
-            profile.sizeInBytes = request.table.size_in_bytes
-            # If table is partitioned we profile only one partition (if nothing set then the last one)
-            # but for table level we can use the rows_count from the table metadata
-            # This way even though column statistics only reflects one partition data but the rows count
-            # shows the proper count.
-            if profile.partitionSpec and profile.partitionSpec.partition:
-                profile.rowCount = request.table.rows_count
-
-            dataset_name = request.pretty_name
-            dataset_urn = make_dataset_urn_with_platform_instance(
-                self.platform,
-                dataset_name,
-                self.config.platform_instance,
-                self.config.env,
-            )
-            # We don't add to the profiler state if we only do table level profiling as it always happens
-            if self.state_handler and not request.profile_table_level_only:
-                self.state_handler.add_to_state(
-                    dataset_urn, int(datetime.now().timestamp() * 1000)
-                )
-
-            yield MetadataChangeProposalWrapper(
-                entityUrn=dataset_urn, aspect=profile
-            ).as_workunit()
+        )
 
-    def get_bigquery_profile_request(
-        self, project: str, dataset: str, table: BigqueryTable
-    ) -> Optional[BigqueryProfilerRequest]:
-        skip_profiling = False
-        profile_table_level_only = self.config.profiling.profile_table_level_only
-        dataset_name = BigqueryTableIdentifier(
-            project_id=project, dataset=dataset, table=table.name
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        return BigqueryTableIdentifier(
+            project_id=db_name, dataset=schema_name, table=table_name
         ).get_table_name()
-        if not self.is_dataset_eligible_for_profiling(
-            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
-        ):
-            profile_table_level_only = True
-            self.report.num_tables_not_eligible_profiling[f"{project}.{dataset}"] += 1
 
-        if not table.column_count:
-            skip_profiling = True
+    def get_batch_kwargs(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> dict:
+        return dict(
+            schema=db_name,  # <project>
+            table=f"{schema_name}.{table.name}",  # <dataset>.<table>
+        )
 
-        if skip_profiling:
-            if self.config.profiling.report_dropped_profiles:
-                self.report.report_dropped(f"profile of {dataset_name}")
+    def get_profile_request(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> Optional[TableProfilerRequest]:
+        profile_request = super().get_profile_request(table, schema_name, db_name)
+
+        if not profile_request:
             return None
 
+        # Below code handles profiling changes required for partitioned or sharded tables
+        # 1. Skip profile if partition profiling is disabled.
+        # 2. Else update `profile_request.batch_kwargs` with partition and custom_sql
+
+        bq_table = cast(BigqueryTable, table)
         (partition, custom_sql) = self.generate_partition_profiler_query(
-            project, dataset, table, self.config.profiling.partition_datetime
+            db_name, schema_name, bq_table, self.config.profiling.partition_datetime
         )
-        if partition is None and table.partition_info:
+
+        if partition is None and bq_table.partition_info:
             self.report.report_warning(
                 "profile skipped as partitioned table is empty or partition id or type was invalid",
-                dataset_name,
+                profile_request.pretty_name,
             )
             return None
         if (
@@ -268,24 +229,20 @@ def get_bigquery_profile_request(
             and not self.config.profiling.partition_profiling_enabled
         ):
             logger.debug(
-                f"{dataset_name} and partition {partition} is skipped because profiling.partition_profiling_enabled property is disabled"
+                f"{profile_request.pretty_name} and partition {partition} is skipped because profiling.partition_profiling_enabled property is disabled"
             )
             self.report.profiling_skipped_partition_profiling_disabled.append(
-                dataset_name
+                profile_request.pretty_name
             )
             return None
 
-        self.report.report_entity_profiled(dataset_name)
-        logger.debug(f"Preparing profiling request for {dataset_name}")
-        profile_request = BigqueryProfilerRequest(
-            pretty_name=dataset_name,
-            batch_kwargs=dict(
-                schema=project,
-                table=f"{dataset}.{table.name}",
-                custom_sql=custom_sql,
-                partition=partition,
-            ),
-            table=table,
-            profile_table_level_only=profile_table_level_only,
-        )
+        if partition:
+            logger.debug("Updating profiling request for partitioned/sharded tables")
+            profile_request.batch_kwargs.update(
+                dict(
+                    custom_sql=custom_sql,
+                    partition=partition,
+                )
+            )
+
         return profile_request
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index 01e083d566168..9f6ac9dd21164 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -273,6 +273,7 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
     partition: Optional[str]
     config: GEProfilingConfig
     report: SQLSourceReport
+    custom_sql: Optional[str]
 
     query_combiner: SQLAlchemyQueryCombiner
 
@@ -596,16 +597,8 @@ def generate_dataset_profile(  # noqa: C901 (complexity)
             "catch_exceptions", self.config.catch_exceptions
         )
 
-        profile = DatasetProfileClass(timestampMillis=get_sys_time())
-        if self.partition:
-            profile.partitionSpec = PartitionSpecClass(partition=self.partition)
-        elif self.config.limit and self.config.offset:
-            profile.partitionSpec = PartitionSpecClass(
-                type=PartitionTypeClass.QUERY,
-                partition=json.dumps(
-                    dict(limit=self.config.limit, offset=self.config.offset)
-                ),
-            )
+        profile = self.init_profile()
+
         profile.fieldProfiles = []
         self._get_dataset_rows(profile)
 
@@ -740,6 +733,24 @@ def generate_dataset_profile(  # noqa: C901 (complexity)
         self.query_combiner.flush()
         return profile
 
+    def init_profile(self):
+        profile = DatasetProfileClass(timestampMillis=get_sys_time())
+        if self.partition:
+            profile.partitionSpec = PartitionSpecClass(partition=self.partition)
+        elif self.config.limit:
+            profile.partitionSpec = PartitionSpecClass(
+                type=PartitionTypeClass.QUERY,
+                partition=json.dumps(
+                    dict(limit=self.config.limit, offset=self.config.offset)
+                ),
+            )
+        elif self.custom_sql:
+            profile.partitionSpec = PartitionSpecClass(
+                type=PartitionTypeClass.QUERY, partition="SAMPLE"
+            )
+
+        return profile
+
     def update_dataset_batch_use_sampling(self, profile: DatasetProfileClass) -> None:
         if (
             self.dataset.engine.dialect.name.lower() == BIGQUERY
@@ -1064,6 +1075,7 @@ def _generate_single_profile(
                     partition,
                     self.config,
                     self.report,
+                    custom_sql,
                     query_combiner,
                 ).generate_dataset_profile()
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
index 77761c529ba0b..24a3e520d8caf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
@@ -157,12 +157,12 @@ class GEProfilingConfig(ConfigModel):
     )
     use_sampling: bool = Field(
         default=True,
-        description="Whether to profile column level stats on sample of table. Only BigQuery supports this. "
+        description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
         "If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
     )
 
     sample_size: int = Field(
-        default=1000,
+        default=10000,
         description="Number of rows to be sampled from table for column level profiling."
         "Applicable only if `use_sampling` is set to True.",
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py
index e983734082b1d..771636e8498a3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py
@@ -1,33 +1,19 @@
-import dataclasses
 import logging
-from datetime import datetime
-from typing import Dict, Iterable, List, Optional, Union, cast
+from typing import Dict, Iterable, List, Optional, Union
 
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.ge_data_profiler import GEProfilerRequest
 from datahub.ingestion.source.redshift.config import RedshiftConfig
 from datahub.ingestion.source.redshift.redshift_schema import (
     RedshiftTable,
     RedshiftView,
 )
 from datahub.ingestion.source.redshift.report import RedshiftReport
-from datahub.ingestion.source.sql.sql_generic_profiler import (
-    GenericProfiler,
-    TableProfilerRequest,
-)
+from datahub.ingestion.source.sql.sql_generic_profiler import GenericProfiler
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 
 logger = logging.getLogger(__name__)
 
 
-@dataclasses.dataclass
-class RedshiftProfilerRequest(GEProfilerRequest):
-    table: Union[RedshiftTable, RedshiftView]
-    profile_table_level_only: bool = False
-
-
 class RedshiftProfiler(GenericProfiler):
     config: RedshiftConfig
     report: RedshiftReport
@@ -63,80 +49,21 @@ def get_workunits(
                     continue
                 for table in tables[db].get(schema, {}):
                     # Emit the profile work unit
-                    profile_request = self.get_redshift_profile_request(
-                        table, schema, db
-                    )
+                    profile_request = self.get_profile_request(table, schema, db)
                     if profile_request is not None:
+                        self.report.report_entity_profiled(profile_request.pretty_name)
                         profile_requests.append(profile_request)
 
             if len(profile_requests) == 0:
                 continue
-            table_profile_requests = cast(List[TableProfilerRequest], profile_requests)
-            for request, profile in self.generate_profiles(
-                table_profile_requests,
+
+            yield from self.generate_profile_workunits(
+                profile_requests,
                 self.config.profiling.max_workers,
                 db,
                 platform=self.platform,
                 profiler_args=self.get_profile_args(),
-            ):
-                if profile is None:
-                    continue
-                request = cast(RedshiftProfilerRequest, request)
-
-                profile.sizeInBytes = request.table.size_in_bytes
-                dataset_name = request.pretty_name
-                dataset_urn = make_dataset_urn_with_platform_instance(
-                    self.platform,
-                    dataset_name,
-                    self.config.platform_instance,
-                    self.config.env,
-                )
-
-                # We don't add to the profiler state if we only do table level profiling as it always happens
-                if self.state_handler and not request.profile_table_level_only:
-                    self.state_handler.add_to_state(
-                        dataset_urn, int(datetime.now().timestamp() * 1000)
-                    )
-
-                yield MetadataChangeProposalWrapper(
-                    entityUrn=dataset_urn, aspect=profile
-                ).as_workunit()
-
-    def get_redshift_profile_request(
-        self,
-        table: Union[RedshiftTable, RedshiftView],
-        schema_name: str,
-        db_name: str,
-    ) -> Optional[RedshiftProfilerRequest]:
-        skip_profiling = False
-        profile_table_level_only = self.config.profiling.profile_table_level_only
-        dataset_name = f"{db_name}.{schema_name}.{table.name}".lower()
-        if not self.is_dataset_eligible_for_profiling(
-            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
-        ):
-            # Profile only table level if dataset is filtered from profiling
-            # due to size limits alone
-            if self.is_dataset_eligible_for_profiling(
-                dataset_name, table.last_altered, 0, 0
-            ):
-                profile_table_level_only = True
-            else:
-                skip_profiling = True
-
-        if len(table.columns) == 0:
-            skip_profiling = True
-
-        if skip_profiling:
-            if self.config.profiling.report_dropped_profiles:
-                self.report.report_dropped(f"profile of {dataset_name}")
-            return None
+            )
 
-        self.report.report_entity_profiled(dataset_name)
-        logger.debug(f"Preparing profiling request for {dataset_name}")
-        profile_request = RedshiftProfilerRequest(
-            pretty_name=dataset_name,
-            batch_kwargs=dict(schema=schema_name, table=table.name),
-            table=table,
-            profile_table_level_only=profile_table_level_only,
-        )
-        return profile_request
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        return f"{db_name}.{schema_name}.{table_name}".lower()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
index 5f5e8e4bcdea3..24275dcdff34d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
@@ -1,20 +1,12 @@
-import dataclasses
 import logging
-from datetime import datetime
-from typing import Callable, Dict, Iterable, List, Optional, cast
+from typing import Callable, Dict, Iterable, List, Optional
 
 from snowflake.sqlalchemy import snowdialect
 from sqlalchemy import create_engine, inspect
 from sqlalchemy.sql import sqltypes
 
-from datahub.configuration.pattern_utils import is_schema_allowed
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.ge_data_profiler import (
-    DatahubGEProfiler,
-    GEProfilerRequest,
-)
+from datahub.ingestion.source.ge_data_profiler import DatahubGEProfiler
 from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
 from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery
 from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report
@@ -23,10 +15,8 @@
     SnowflakeTable,
 )
 from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeCommonMixin
-from datahub.ingestion.source.sql.sql_generic_profiler import (
-    GenericProfiler,
-    TableProfilerRequest,
-)
+from datahub.ingestion.source.sql.sql_generic import BaseTable
+from datahub.ingestion.source.sql.sql_generic_profiler import GenericProfiler
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 
 snowdialect.ischema_names["GEOGRAPHY"] = sqltypes.NullType
@@ -35,12 +25,6 @@
 logger = logging.getLogger(__name__)
 
 
-@dataclasses.dataclass
-class SnowflakeProfilerRequest(GEProfilerRequest):
-    table: SnowflakeTable
-    profile_table_level_only: bool = False
-
-
 class SnowflakeProfiler(GenericProfiler, SnowflakeCommonMixin):
     def __init__(
         self,
@@ -65,101 +49,52 @@ def get_workunits(
 
         profile_requests = []
         for schema in database.schemas:
-            if not is_schema_allowed(
-                self.config.schema_pattern,
-                schema.name,
-                database.name,
-                self.config.match_fully_qualified_names,
-            ):
-                continue
-
             for table in db_tables[schema.name]:
-                profile_request = self.get_snowflake_profile_request(
+                profile_request = self.get_profile_request(
                     table, schema.name, database.name
                 )
                 if profile_request is not None:
+                    self.report.report_entity_profiled(profile_request.pretty_name)
                     profile_requests.append(profile_request)
 
         if len(profile_requests) == 0:
             return
 
-        table_profile_requests = cast(List[TableProfilerRequest], profile_requests)
-
-        for request, profile in self.generate_profiles(
-            table_profile_requests,
+        yield from self.generate_profile_workunits(
+            profile_requests,
             self.config.profiling.max_workers,
             database.name,
             platform=self.platform,
             profiler_args=self.get_profile_args(),
-        ):
-            if profile is None:
-                continue
-            profile.sizeInBytes = cast(
-                SnowflakeProfilerRequest, request
-            ).table.size_in_bytes
-            dataset_name = request.pretty_name
-            dataset_urn = make_dataset_urn_with_platform_instance(
-                self.platform,
-                dataset_name,
-                self.config.platform_instance,
-                self.config.env,
-            )
-
-            # We don't add to the profiler state if we only do table level profiling as it always happens
-            if self.state_handler:
-                self.state_handler.add_to_state(
-                    dataset_urn, int(datetime.now().timestamp() * 1000)
-                )
-
-            yield MetadataChangeProposalWrapper(
-                entityUrn=dataset_urn, aspect=profile
-            ).as_workunit()
+        )
 
-    def get_snowflake_profile_request(
-        self,
-        table: SnowflakeTable,
-        schema_name: str,
-        db_name: str,
-    ) -> Optional[SnowflakeProfilerRequest]:
-        skip_profiling = False
-        profile_table_level_only = self.config.profiling.profile_table_level_only
-        dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name)
-        if not self.is_dataset_eligible_for_profiling(
-            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        return self.get_dataset_identifier(table_name, schema_name, db_name)
+
+    def get_batch_kwargs(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> dict:
+        custom_sql = None
+        if (
+            not self.config.profiling.limit
+            and self.config.profiling.use_sampling
+            and table.rows_count
+            and table.rows_count > self.config.profiling.sample_size
         ):
-            # Profile only table level if dataset is filtered from profiling
-            # due to size limits alone
-            if self.is_dataset_eligible_for_profiling(
-                dataset_name, table.last_altered, 0, 0
-            ):
-                profile_table_level_only = True
-            else:
-                skip_profiling = True
-
-        if len(table.columns) == 0:
-            skip_profiling = True
-
-        if skip_profiling:
-            if self.config.profiling.report_dropped_profiles:
-                self.report.report_dropped(f"profile of {dataset_name}")
-            return None
-
-        self.report.report_entity_profiled(dataset_name)
-        logger.debug(f"Preparing profiling request for {dataset_name}")
-        profile_request = SnowflakeProfilerRequest(
-            pretty_name=dataset_name,
-            batch_kwargs=dict(
-                schema=schema_name,
-                table=table.name,
-                # Lowercase/Mixedcase table names in Snowflake do not work by default.
-                # We need to pass `use_quoted_name=True` for such tables as mentioned here -
-                # https://github.com/great-expectations/great_expectations/pull/2023
-                use_quoted_name=(table.name != table.name.upper()),
-            ),
-            table=table,
-            profile_table_level_only=profile_table_level_only,
-        )
-        return profile_request
+            # GX creates a temporary table from query if query is passed as batch kwargs.
+            # We are using fraction-based sampling here, instead of fixed-size sampling because
+            # Fixed-size sampling can be slower than equivalent fraction-based sampling
+            # as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations
+            sample_pc = 100 * self.config.profiling.sample_size / table.rows_count
+            custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})'
+        return {
+            **super().get_batch_kwargs(table, schema_name, db_name),
+            # Lowercase/Mixedcase table names in Snowflake do not work by default.
+            # We need to pass `use_quoted_name=True` for such tables as mentioned here -
+            # https://github.com/great-expectations/great_expectations/pull/2023
+            "use_quoted_name": (table.name != table.name.upper()),
+            "custom_sql": custom_sql,
+        }
 
     def get_profiler_instance(
         self, db_name: Optional[str] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
index 344c114d464a9..aaeee5717a867 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
@@ -1,12 +1,15 @@
 import logging
+from abc import abstractmethod
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
-from typing import Dict, Iterable, List, Optional, Tuple, Union, cast
+from typing import Dict, Iterable, List, Optional, Union, cast
 
 from sqlalchemy import create_engine, inspect
 from sqlalchemy.engine.reflection import Inspector
 
 from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.ge_data_profiler import (
     DatahubGEProfiler,
     GEProfilerRequest,
@@ -16,7 +19,7 @@
 from datahub.ingestion.source.sql.sql_generic import BaseTable, BaseView
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile
-from datahub.metadata.schema_classes import DatasetProfileClass
+from datahub.metadata.com.linkedin.pegasus2avro.timeseries import PartitionType
 from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
 
 
@@ -63,14 +66,14 @@ def __init__(
         self.platform = platform
         self.state_handler = state_handler
 
-    def generate_profiles(
+    def generate_profile_workunits(
         self,
         requests: List[TableProfilerRequest],
         max_workers: int,
         db_name: Optional[str] = None,
         platform: Optional[str] = None,
         profiler_args: Optional[Dict] = None,
-    ) -> Iterable[Tuple[GEProfilerRequest, Optional[DatasetProfileClass]]]:
+    ) -> Iterable[MetadataWorkUnit]:
         ge_profile_requests: List[GEProfilerRequest] = [
             cast(GEProfilerRequest, request)
             for request in requests
@@ -80,21 +83,109 @@ def generate_profiles(
             request for request in requests if request.profile_table_level_only
         ]
         for request in table_level_profile_requests:
-            profile = DatasetProfile(
+            table_level_profile = DatasetProfile(
                 timestampMillis=int(datetime.now().timestamp() * 1000),
                 columnCount=request.table.column_count,
                 rowCount=request.table.rows_count,
                 sizeInBytes=request.table.size_in_bytes,
             )
-            yield (request, profile)
+            dataset_urn = self.dataset_urn_builder(request.pretty_name)
+            yield MetadataChangeProposalWrapper(
+                entityUrn=dataset_urn, aspect=table_level_profile
+            ).as_workunit()
 
         if not ge_profile_requests:
             return
 
         # Otherwise, if column level profiling is enabled, use  GE profiler.
         ge_profiler = self.get_profiler_instance(db_name)
-        yield from ge_profiler.generate_profiles(
+
+        for ge_profiler_request, profile in ge_profiler.generate_profiles(
             ge_profile_requests, max_workers, platform, profiler_args
+        ):
+            if profile is None:
+                continue
+
+            request = cast(TableProfilerRequest, ge_profiler_request)
+            profile.sizeInBytes = request.table.size_in_bytes
+
+            # If table is partitioned we profile only one partition (if nothing set then the last one)
+            # but for table level we can use the rows_count from the table metadata
+            # This way even though column statistics only reflects one partition data but the rows count
+            # shows the proper count.
+            if (
+                profile.partitionSpec
+                and profile.partitionSpec.type != PartitionType.FULL_TABLE
+            ):
+                profile.rowCount = request.table.rows_count
+
+            dataset_urn = self.dataset_urn_builder(request.pretty_name)
+
+            # We don't add to the profiler state if we only do table level profiling as it always happens
+            if self.state_handler:
+                self.state_handler.add_to_state(
+                    dataset_urn, int(datetime.now().timestamp() * 1000)
+                )
+            yield MetadataChangeProposalWrapper(
+                entityUrn=dataset_urn, aspect=profile
+            ).as_workunit()
+
+    def dataset_urn_builder(self, dataset_name: str) -> str:
+        return make_dataset_urn_with_platform_instance(
+            self.platform,
+            dataset_name,
+            self.config.platform_instance,
+            self.config.env,
+        )
+
+    @abstractmethod
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        pass
+
+    def get_profile_request(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> Optional[TableProfilerRequest]:
+        skip_profiling = False
+        profile_table_level_only = self.config.profiling.profile_table_level_only
+        dataset_name = self.get_dataset_name(table.name, schema_name, db_name)
+        if not self.is_dataset_eligible_for_profiling(
+            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
+        ):
+            # Profile only table level if dataset is filtered from profiling
+            # due to size limits alone
+            if self.is_dataset_eligible_for_profiling(
+                dataset_name, table.last_altered, 0, 0
+            ):
+                profile_table_level_only = True
+            else:
+                skip_profiling = True
+                self.report.num_tables_not_eligible_profiling[
+                    f"{db_name}.{schema_name}"
+                ] += 1
+
+        if table.column_count == 0:
+            skip_profiling = True
+
+        if skip_profiling:
+            if self.config.profiling.report_dropped_profiles:
+                self.report.report_dropped(f"profile of {dataset_name}")
+            return None
+
+        logger.debug(f"Preparing profiling request for {dataset_name}")
+        profile_request = TableProfilerRequest(
+            pretty_name=dataset_name,
+            batch_kwargs=self.get_batch_kwargs(table, schema_name, db_name),
+            table=table,
+            profile_table_level_only=profile_table_level_only,
+        )
+        return profile_request
+
+    def get_batch_kwargs(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> dict:
+        return dict(
+            schema=schema_name,
+            table=table.name,
         )
 
     def get_inspectors(self) -> Iterable[Inspector]:

From c0feceb76fbf607e2883b7f2960eaf6c757629e4 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 6 Oct 2023 17:10:24 -0400
Subject: [PATCH 19/98] test(): Manage Access Tokens Cypress test (#8936)

---
 .../src/app/settings/AccessTokenModal.tsx     |  4 +-
 .../src/app/settings/AccessTokens.tsx         |  7 ++-
 .../src/app/settings/CreateTokenModal.tsx     | 18 +++++---
 .../e2e/settings/manage_access_tokens.js      | 43 +++++++++++++++++++
 .../tests/cypress/cypress/support/commands.js |  6 +++
 5 files changed, 70 insertions(+), 8 deletions(-)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js

diff --git a/datahub-web-react/src/app/settings/AccessTokenModal.tsx b/datahub-web-react/src/app/settings/AccessTokenModal.tsx
index 0303db656c2a8..10427210d0692 100644
--- a/datahub-web-react/src/app/settings/AccessTokenModal.tsx
+++ b/datahub-web-react/src/app/settings/AccessTokenModal.tsx
@@ -60,7 +60,7 @@ export const AccessTokenModal = ({ visible, onClose, accessToken, expiresInText
             onCancel={onClose}
             footer={
                 <>
-                    <Button id="createTokenButton" onClick={onClose}>
+                    <Button id="createTokenButton" onClick={onClose} data-testid="access-token-modal-close-button">
                         Close
                     </Button>
                 </>
@@ -81,7 +81,7 @@ export const AccessTokenModal = ({ visible, onClose, accessToken, expiresInText
                 <ModalSectionHeader strong>Token</ModalSectionHeader>
                 <ModalSectionParagraph>{expiresInText}</ModalSectionParagraph>
                 <Typography.Paragraph copyable={{ text: accessToken }}>
-                    <pre>{accessToken}</pre>
+                    <pre data-testid="access-token-value">{accessToken}</pre>
                 </Typography.Paragraph>
             </ModalSection>
             <ModalSection>
diff --git a/datahub-web-react/src/app/settings/AccessTokens.tsx b/datahub-web-react/src/app/settings/AccessTokens.tsx
index 02ff3f1cd304c..c7a015de392da 100644
--- a/datahub-web-react/src/app/settings/AccessTokens.tsx
+++ b/datahub-web-react/src/app/settings/AccessTokens.tsx
@@ -199,7 +199,12 @@ export const AccessTokens = () => {
             key: 'x',
             render: (_, record: any) => (
                 <ActionButtonContainer>
-                    <Button onClick={() => onRemoveToken(record)} icon={<DeleteOutlined />} danger>
+                    <Button
+                        onClick={() => onRemoveToken(record)}
+                        icon={<DeleteOutlined />}
+                        danger
+                        data-testid="revoke-token-button"
+                    >
                         Revoke
                     </Button>
                 </ActionButtonContainer>
diff --git a/datahub-web-react/src/app/settings/CreateTokenModal.tsx b/datahub-web-react/src/app/settings/CreateTokenModal.tsx
index 6038a86e23303..3cc446651efcb 100644
--- a/datahub-web-react/src/app/settings/CreateTokenModal.tsx
+++ b/datahub-web-react/src/app/settings/CreateTokenModal.tsx
@@ -117,10 +117,15 @@ export default function CreateTokenModal({ currentUserUrn, visible, onClose, onC
                 onCancel={onModalClose}
                 footer={
                     <>
-                        <Button onClick={onModalClose} type="text">
+                        <Button onClick={onModalClose} type="text" data-testid="cancel-create-access-token-button">
                             Cancel
                         </Button>
-                        <Button id="createTokenButton" onClick={onCreateNewToken} disabled={createButtonEnabled}>
+                        <Button
+                            id="createTokenButton"
+                            onClick={onCreateNewToken}
+                            disabled={createButtonEnabled}
+                            data-testid="create-access-token-button"
+                        >
                             Create
                         </Button>
                     </>
@@ -148,18 +153,21 @@ export default function CreateTokenModal({ currentUserUrn, visible, onClose, onC
                             ]}
                             hasFeedback
                         >
-                            <Input placeholder="A name for your token" />
+                            <Input placeholder="A name for your token" data-testid="create-access-token-name" />
                         </Form.Item>
                     </Form.Item>
                     <Form.Item label={<Typography.Text strong>Description</Typography.Text>}>
                         <Typography.Paragraph>An optional description for your new token.</Typography.Paragraph>
                         <Form.Item name="description" rules={[{ whitespace: true }, { min: 1, max: 500 }]} hasFeedback>
-                            <Input placeholder="A description for your token" />
+                            <Input
+                                placeholder="A description for your token"
+                                data-testid="create-access-token-description"
+                            />
                         </Form.Item>
                     </Form.Item>
                     <ExpirationSelectContainer>
                         <Typography.Text strong>Expires in</Typography.Text>
-                        <Form.Item name="duration" noStyle>
+                        <Form.Item name="duration" data-testid="create-access-token-duration" noStyle>
                             <ExpirationDurationSelect>
                                 {ACCESS_TOKEN_DURATIONS.map((duration) => (
                                     <Select.Option key={duration.text} value={duration.duration}>
diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js b/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js
new file mode 100644
index 0000000000000..7a77c2b77df5b
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js
@@ -0,0 +1,43 @@
+import { aliasQuery, hasOperationName } from "../utils";
+const test_id = Math.floor(Math.random() * 100000);
+
+describe("manage access tokens", () => {
+    before(() => {
+      cy.intercept("POST", "/api/v2/graphql", (req) => {
+        aliasQuery(req, "appConfig");
+      });
+    });
+    
+    const setTokenAuthEnabledFlag = (isOn) => {
+      cy.intercept("POST", "/api/v2/graphql", (req) => {
+        if (hasOperationName(req, "appConfig")) {
+          req.reply((res) => {
+            res.body.data.appConfig.authConfig.tokenAuthEnabled = isOn;
+          });
+        }
+      });
+    };
+
+    it("create and revoke access token", () => {
+      //create access token, verify token on ui
+      setTokenAuthEnabledFlag(true);
+      cy.loginWithCredentials();
+      cy.goToAccessTokenSettings();
+      cy.clickOptionWithTestId("add-token-button");
+      cy.enterTextInTestId("create-access-token-name", "Token Name" + test_id);
+      cy.enterTextInTestId("create-access-token-description", "Token Description" + test_id);
+      cy.clickOptionWithTestId("create-access-token-button");
+      cy.waitTextVisible("New Personal Access Token");
+      cy.get('[data-testid="access-token-value"]').should("be.visible");
+      cy.get('[data-testid="access-token-value"]').invoke('text').should('match', /^[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+$/);
+      cy.clickOptionWithTestId("access-token-modal-close-button");
+      //revoke access token, verify token removed from ui
+      cy.waitTextVisible("Token Name" + test_id);
+      cy.waitTextVisible("Token Description" + test_id);
+      cy.clickOptionWithTestId("revoke-token-button");
+      cy.waitTextVisible("Are you sure you want to revoke this token?");
+      cy.clickOptionWithText("Yes");
+      cy.ensureTextNotPresent("Token Name" + test_id);
+      cy.ensureTextNotPresent("Token Description" + test_id);
+    });
+});
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js
index 8bfe7305c001f..64bc1253fc383 100644
--- a/smoke-test/tests/cypress/cypress/support/commands.js
+++ b/smoke-test/tests/cypress/cypress/support/commands.js
@@ -84,6 +84,12 @@ Cypress.Commands.add("goToOwnershipTypesSettings", () => {
   cy.waitTextVisible("Manage Ownership");
 });
 
+Cypress.Commands.add("goToAccessTokenSettings", () => {
+  cy.visit("/settings/tokens");
+  cy.waitTextVisible("Manage Access Tokens");
+  cy.wait(3000);
+});
+
 Cypress.Commands.add("goToIngestionPage", () => {
   cy.visit("/ingestion");
   cy.waitTextVisible("Manage Ingestion");

From b191abbc5bb32a0a3c895facdff14d146da9fb74 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 6 Oct 2023 17:11:57 -0400
Subject: [PATCH 20/98] test(): Nested domains cypress test (#8879)

---
 .../src/app/domain/CreateDomainModal.tsx      |  5 +-
 .../nestedDomains/ManageDomainsPageV2.tsx     |  7 ++-
 .../domainNavigator/DomainNode.tsx            |  2 +-
 .../shared/EntityDropdown/EntityDropdown.tsx  |  4 +-
 .../shared/EntityDropdown/MoveDomainModal.tsx |  5 +-
 .../cypress/e2e/domains/nested_domains.js     | 53 +++++++++++++++++++
 6 files changed, 70 insertions(+), 6 deletions(-)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js

diff --git a/datahub-web-react/src/app/domain/CreateDomainModal.tsx b/datahub-web-react/src/app/domain/CreateDomainModal.tsx
index ca1bc30596003..606444d34bdc9 100644
--- a/datahub-web-react/src/app/domain/CreateDomainModal.tsx
+++ b/datahub-web-react/src/app/domain/CreateDomainModal.tsx
@@ -191,7 +191,10 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                         rules={[{ whitespace: true }, { min: 1, max: 500 }]}
                         hasFeedback
                     >
-                        <Input.TextArea placeholder="A description for your domain" />
+                        <Input.TextArea
+                            placeholder="A description for your domain"
+                            data-testid="create-domain-description"
+                        />
                     </FormItemNoMargin>
                 </FormItemWithMargin>
                 <Collapse ghost>
diff --git a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
index 0e5c035df00c1..b69f0c5458b5d 100644
--- a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
+++ b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
@@ -42,7 +42,12 @@ export default function ManageDomainsPageV2() {
             <OnboardingTour stepIds={[DOMAINS_INTRO_ID, DOMAINS_CREATE_DOMAIN_ID]} />
             <Header>
                 <DomainsTitle />
-                <Button type="primary" id={DOMAINS_CREATE_DOMAIN_ID} onClick={() => setIsCreatingDomain(true)}>
+                <Button
+                    type="primary"
+                    id={DOMAINS_CREATE_DOMAIN_ID}
+                    onClick={() => setIsCreatingDomain(true)}
+                    data-testid="domains-new-domain-button"
+                >
                     <PlusOutlined /> New Domain
                 </Button>
             </Header>
diff --git a/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
index 09c8e13853bb7..bf70bd043fd4a 100644
--- a/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
+++ b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
@@ -103,7 +103,7 @@ export default function DomainNode({ domain, numDomainChildren, domainUrnToHide,
 
     return (
         <>
-            <RowWrapper>
+            <RowWrapper data-testid="domain-list-item">
                 {hasDomainChildren && (
                     <ButtonWrapper>
                         <RotatingTriangle isOpen={isOpen && !isClosing} onClick={toggle} />
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
index be975249b2670..bfb7ff7e540c4 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
@@ -203,7 +203,7 @@ function EntityDropdown(props: Props) {
                                 disabled={isMoveDisabled(entityType, entityData, me.platformPrivileges)}
                                 onClick={() => setIsMoveModalVisible(true)}
                             >
-                                <MenuItem>
+                                <MenuItem data-testid="entity-menu-move-button">
                                     <FolderOpenOutlined /> &nbsp;Move
                                 </MenuItem>
                             </StyledMenuItem>
@@ -223,7 +223,7 @@ function EntityDropdown(props: Props) {
                                             : undefined
                                     }
                                 >
-                                    <MenuItem>
+                                    <MenuItem data-testid="entity-menu-delete-button">
                                         <DeleteOutlined /> &nbsp;Delete
                                     </MenuItem>
                                 </Tooltip>
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
index cdbf6fdabf3c9..3826f934c1c25 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
@@ -67,6 +67,7 @@ function MoveDomainModal(props: Props) {
     return (
         <Modal
             title="Move"
+            data-testid="move-domain-modal"
             visible
             onCancel={onClose}
             footer={
@@ -74,7 +75,9 @@ function MoveDomainModal(props: Props) {
                     <Button onClick={onClose} type="text">
                         Cancel
                     </Button>
-                    <Button onClick={moveDomain}>Move</Button>
+                    <Button onClick={moveDomain} data-testid="move-domain-modal-move-button">
+                        Move
+                    </Button>
                 </>
             }
         >
diff --git a/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js b/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js
new file mode 100644
index 0000000000000..a2d4de0f51659
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js
@@ -0,0 +1,53 @@
+const domainName = "CypressNestedDomain";
+const domainDescription = "CypressNestedDomainDescription";
+
+describe("nested domains test", () => {
+
+    it("create a domain, move under parent, remove domain", () => {
+        // Create a new domain without a parent
+        cy.loginWithCredentials();
+        cy.goToDomainList();
+        cy.clickOptionWithTestId("domains-new-domain-button");
+        cy.get('[data-testid="create-domain-name"]').click().type(domainName);
+        cy.get('[data-testid="create-domain-description"]').click().type(domainDescription);
+        cy.clickOptionWithTestId("create-domain-button");
+        cy.waitTextVisible(domainName);
+
+        // Ensure the new domain has no parent in the navigation sidebar
+        cy.waitTextVisible(domainDescription);
+
+        // Move a domain from the root level to be under a parent domain
+        cy.clickOptionWithText(domainName);
+        cy.openThreeDotDropdown();
+        cy.clickOptionWithTestId("entity-menu-move-button");
+        cy.get('[data-testid="move-domain-modal"]').contains("Marketing").click({force: true});
+        cy.get('[data-testid="move-domain-modal"]').contains("Marketing").should("be.visible");
+        cy.clickOptionWithTestId("move-domain-modal-move-button").wait(5000);
+
+        // Wnsure domain is no longer on the sidebar navigator at the top level but shows up under the parent
+        cy.goToDomainList();
+        cy.ensureTextNotPresent(domainName);
+        cy.ensureTextNotPresent(domainDescription);
+        cy.waitTextVisible("1 sub-domain");
+
+        // Move a domain from under a parent domain to the root level
+        cy.get('[data-testid="domain-list-item"]').contains("Marketing").prev().click();
+        cy.clickOptionWithText(domainName);
+        cy.openThreeDotDropdown();
+        cy.clickOptionWithTestId("entity-menu-move-button");
+        cy.clickOptionWithTestId("move-domain-modal-move-button").wait(5000);
+        cy.goToDomainList();
+        cy.waitTextVisible(domainName);
+        cy.waitTextVisible(domainDescription);
+
+        // Delete a domain
+        cy.clickOptionWithText(domainName).wait(3000);
+        cy.openThreeDotDropdown();
+        cy.clickOptionWithTestId("entity-menu-delete-button");
+        cy.waitTextVisible("Are you sure you want to remove this Domain?");
+        cy.clickOptionWithText("Yes");
+        cy.waitTextVisible("Deleted Domain!");
+        cy.ensureTextNotPresent(domainName);
+        cy.ensureTextNotPresent(domainDescription);
+    });
+});
\ No newline at end of file

From 93958302d529a65021c78f880347930297854692 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Sun, 8 Oct 2023 13:26:48 -0400
Subject: [PATCH 21/98] feat(models/assertion): Add SQL Assertions (#8969)

---
 .../com/linkedin/assertion/AssertionInfo.pdl  | 17 ++++-
 .../linkedin/assertion/SqlAssertionInfo.pdl   | 67 +++++++++++++++++++
 2 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl

diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
index ae2a58028057b..e161270145a88 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
@@ -32,6 +32,11 @@ record AssertionInfo includes CustomProperties, ExternalReference {
        */
       VOLUME
 
+      /**
+       * A raw SQL-statement based assertion
+       */
+      SQL
+
       /**
        * A schema or structural assertion.
        *
@@ -56,7 +61,12 @@ record AssertionInfo includes CustomProperties, ExternalReference {
     volumeAssertion: optional VolumeAssertionInfo
 
     /**
-    * An schema Assertion definition. This field is populated when the type is DATASET_SCHEMA
+    * A SQL Assertion definition. This field is populated when the type is SQL.
+    */
+    sqlAssertion: optional SqlAssertionInfo
+
+    /**
+    * An schema Assertion definition. This field is populated when the type is DATA_SCHEMA
     */
     schemaAssertion: optional SchemaAssertionInfo
 
@@ -67,4 +77,9 @@ record AssertionInfo includes CustomProperties, ExternalReference {
     * the platform where it was ingested from.
     */
     source: optional AssertionSource
+
+    /**
+    * An optional human-readable description of the assertion
+    */
+    description: optional string
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl
new file mode 100644
index 0000000000000..f6ce738252f35
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl
@@ -0,0 +1,67 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.dataset.DatasetFilter
+
+/**
+* Attributes defining a SQL Assertion
+*/
+record SqlAssertionInfo {
+    /**
+     * The type of the SQL assertion being monitored.
+     */
+    @Searchable = {}
+    type: enum SqlAssertionType {
+      /**
+       * A SQL Metric Assertion, e.g. one based on a numeric value returned by an arbitrary SQL query.
+       */
+       METRIC
+       /**
+       * A SQL assertion that is evaluated against the CHANGE in a metric assertion
+       * over time.
+       */
+       METRIC_CHANGE
+    }
+
+    /**
+    * The entity targeted by this SQL check.
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset" ]
+    }
+    entity: Urn
+
+    /**
+    * The SQL statement to be executed when evaluating the assertion (or computing the metric).
+    * This should be a valid and complete statement, executable by itself.
+    *
+    * Usually this should be a SELECT query statement.
+    */
+    statement: string
+
+    /**
+    * The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
+    * This value is required if the type is METRIC_CHANGE.
+    */
+    changeType: optional AssertionValueChangeType
+
+    /**
+    * The operator you'd like to apply to the result of the SQL query.
+    *
+    * Note that at this time, only numeric operators are valid inputs:
+    * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+    * BETWEEN.
+    */
+    operator: AssertionStdOperator
+
+    /**
+    * The parameters you'd like to provide as input to the operator.
+    *
+    * Note that only numeric parameter types are valid inputs: NUMBER.
+    */
+    parameters: AssertionStdParameters
+}
\ No newline at end of file

From 8d175ef7ef1ae8ffada7b2df2fb711ac02a6785d Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Tue, 10 Oct 2023 02:04:25 +0530
Subject: [PATCH 22/98] feat(ingest): incremental lineage source helper (#8941)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../datahub/ingestion/api/source_helpers.py   | 138 +++++++++-
 .../ingestion/source/bigquery_v2/bigquery.py  |   3 +-
 .../source/snowflake/snowflake_v2.py          |   9 +
 .../snowflake_privatelink_golden.json         | 243 +++++++++++------
 .../integration/snowflake/test_snowflake.py   |   2 +
 .../snowflake/test_snowflake_failures.py      |   6 +-
 .../snowflake/test_snowflake_stateful.py      |   3 +-
 ...l_less_upstreams_in_gms_aspect_golden.json | 106 ++++++++
 ...l_more_upstreams_in_gms_aspect_golden.json | 120 +++++++++
 .../incremental_table_lineage_golden.json     |  41 +++
 .../test_incremental_lineage_helper.py        | 244 ++++++++++++++++++
 .../source_helpers}/test_source_helpers.py    |   0
 12 files changed, 829 insertions(+), 86 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
 rename metadata-ingestion/tests/unit/{ => api/source_helpers}/test_source_helpers.py (100%)

diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 7fc15cf829678..42f970e97c95f 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -1,3 +1,4 @@
+import copy
 import logging
 from datetime import datetime, timezone
 from typing import (
@@ -15,9 +16,14 @@
 )
 
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
-from datahub.emitter.mce_builder import make_dataplatform_instance_urn
+from datahub.emitter.mce_builder import (
+    datahub_guid,
+    make_dataplatform_instance_urn,
+    set_aspect,
+)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     BrowsePathEntryClass,
     BrowsePathsClass,
@@ -25,12 +31,17 @@
     ChangeTypeClass,
     ContainerClass,
     DatasetUsageStatisticsClass,
+    FineGrainedLineageClass,
     MetadataChangeEventClass,
     MetadataChangeProposalClass,
     StatusClass,
+    SystemMetadataClass,
     TagKeyClass,
     TimeWindowSizeClass,
+    UpstreamClass,
+    UpstreamLineageClass,
 )
+from datahub.specific.dataset import DatasetPatchBuilder
 from datahub.telemetry import telemetry
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.tag_urn import TagUrn
@@ -366,3 +377,128 @@ def _prepend_platform_instance(
         return [BrowsePathEntryClass(id=urn, urn=urn)] + entries
 
     return entries
+
+
+def auto_incremental_lineage(
+    graph: Optional[DataHubGraph],
+    incremental_lineage: bool,
+    include_column_level_lineage: bool,
+    stream: Iterable[MetadataWorkUnit],
+) -> Iterable[MetadataWorkUnit]:
+    if not incremental_lineage:
+        yield from stream
+        return  # early exit
+
+    for wu in stream:
+        lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type(
+            UpstreamLineageClass
+        )
+        urn = wu.get_urn()
+
+        if lineage_aspect:
+            if isinstance(wu.metadata, MetadataChangeEventClass):
+                set_aspect(
+                    wu.metadata, None, UpstreamLineageClass
+                )  # we'll emit upstreamLineage separately below
+                if len(wu.metadata.proposedSnapshot.aspects) > 0:
+                    yield wu
+
+            yield _lineage_wu_via_read_modify_write(
+                graph, urn, lineage_aspect, wu.metadata.systemMetadata
+            ) if include_column_level_lineage else _convert_upstream_lineage_to_patch(
+                urn, lineage_aspect, wu.metadata.systemMetadata
+            )
+        else:
+            yield wu
+
+
+def _convert_upstream_lineage_to_patch(
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    patch_builder = DatasetPatchBuilder(urn, system_metadata)
+    for upstream in aspect.upstreams:
+        patch_builder.add_upstream_lineage(upstream)
+    mcp = next(iter(patch_builder.build()))
+    return MetadataWorkUnit(id=f"{urn}-upstreamLineage", mcp_raw=mcp)
+
+
+def _lineage_wu_via_read_modify_write(
+    graph: Optional[DataHubGraph],
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    if graph is None:
+        raise ValueError(
+            "Failed to handle incremental lineage, DataHubGraph is missing. "
+            "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. "
+        )
+    gms_aspect = graph.get_aspect(urn, UpstreamLineageClass)
+    if gms_aspect:
+        new_aspect = _merge_upstream_lineage(aspect, gms_aspect)
+    else:
+        new_aspect = aspect
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=urn, aspect=new_aspect, systemMetadata=system_metadata
+    ).as_workunit()
+
+
+def _merge_upstream_lineage(
+    new_aspect: UpstreamLineageClass, gms_aspect: UpstreamLineageClass
+) -> UpstreamLineageClass:
+    merged_aspect = copy.deepcopy(gms_aspect)
+
+    upstreams_map: Dict[str, UpstreamClass] = {
+        upstream.dataset: upstream for upstream in merged_aspect.upstreams
+    }
+
+    upstreams_updated = False
+    fine_upstreams_updated = False
+
+    for table_upstream in new_aspect.upstreams:
+        if table_upstream.dataset not in upstreams_map or (
+            table_upstream.auditStamp.time
+            > upstreams_map[table_upstream.dataset].auditStamp.time
+        ):
+            upstreams_map[table_upstream.dataset] = table_upstream
+            upstreams_updated = True
+
+    if upstreams_updated:
+        merged_aspect.upstreams = list(upstreams_map.values())
+
+    if new_aspect.fineGrainedLineages and merged_aspect.fineGrainedLineages:
+        fine_upstreams_map: Dict[str, FineGrainedLineageClass] = {
+            get_fine_grained_lineage_key(fine_upstream): fine_upstream
+            for fine_upstream in merged_aspect.fineGrainedLineages
+        }
+        for column_upstream in new_aspect.fineGrainedLineages:
+            column_upstream_key = get_fine_grained_lineage_key(column_upstream)
+
+            if column_upstream_key not in fine_upstreams_map or (
+                column_upstream.confidenceScore
+                > fine_upstreams_map[column_upstream_key].confidenceScore
+            ):
+                fine_upstreams_map[column_upstream_key] = column_upstream
+                fine_upstreams_updated = True
+
+        if fine_upstreams_updated:
+            merged_aspect.fineGrainedLineages = list(fine_upstreams_map.values())
+    else:
+        merged_aspect.fineGrainedLineages = (
+            new_aspect.fineGrainedLineages or gms_aspect.fineGrainedLineages
+        )
+
+    return merged_aspect
+
+
+def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str:
+    return datahub_guid(
+        {
+            "upstreams": sorted(fine_upstream.upstreams or []),
+            "downstreams": sorted(fine_upstream.downstreams or []),
+            "transformOperation": fine_upstream.transformOperation,
+        }
+    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index fee181864a2d6..b4a04d96b532b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -461,7 +461,8 @@ def _init_schema_resolver(self) -> SchemaResolver:
                 )
             else:
                 logger.warning(
-                    "Failed to load schema info from DataHub as DataHubGraph is missing.",
+                    "Failed to load schema info from DataHub as DataHubGraph is missing. "
+                    "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. ",
                 )
         return SchemaResolver(platform=self.platform, env=self.config.env)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 215116b4c33fb..e0848b5f9ab34 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -4,6 +4,7 @@
 import os.path
 import platform
 from dataclasses import dataclass
+from functools import partial
 from typing import Callable, Dict, Iterable, List, Optional, Union
 
 import pandas as pd
@@ -35,6 +36,7 @@
     TestableSource,
     TestConnectionReport,
 )
+from datahub.ingestion.api.source_helpers import auto_incremental_lineage
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classification_mixin import ClassificationHandler
 from datahub.ingestion.source.common.subtypes import (
@@ -511,6 +513,13 @@ def _init_schema_resolver(self) -> SchemaResolver:
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [
             *super().get_workunit_processors(),
+            partial(
+                auto_incremental_lineage,
+                self.ctx.graph,
+                self.config.incremental_lineage,
+                self.config.include_column_lineage
+                or self.config.include_view_column_lineage,
+            ),
             StaleEntityRemovalHandler.create(
                 self, self.config, self.ctx
             ).workunit_processor,
diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
index 7687b99ac8d6d..5057dacd5b0c8 100644
--- a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
+++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
@@ -24,7 +24,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -39,7 +40,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -54,7 +56,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -71,7 +74,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -86,7 +90,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -115,7 +120,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -130,7 +136,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -145,7 +152,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -162,7 +170,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -177,7 +186,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -197,7 +207,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -212,7 +223,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -375,7 +387,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -401,7 +414,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -416,7 +430,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -433,7 +448,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -457,7 +473,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -472,7 +489,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -635,7 +653,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -661,7 +680,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -676,7 +696,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -693,7 +714,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -717,7 +739,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -732,7 +755,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -895,7 +919,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -921,7 +946,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -936,7 +962,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -953,7 +980,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -977,7 +1005,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -992,7 +1021,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1155,7 +1185,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1181,7 +1212,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1196,7 +1228,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1213,7 +1246,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1237,7 +1271,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1252,7 +1287,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1415,7 +1451,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1441,7 +1478,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1456,7 +1494,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1473,7 +1512,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1497,7 +1537,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1512,7 +1553,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1675,7 +1717,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1701,7 +1744,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1716,7 +1760,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1733,7 +1778,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1757,7 +1803,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1772,7 +1819,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1935,7 +1983,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1961,7 +2010,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1976,7 +2026,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1993,7 +2044,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2017,7 +2069,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2032,7 +2085,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2195,7 +2249,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2221,7 +2276,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2236,7 +2292,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2253,7 +2310,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2277,7 +2335,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2292,7 +2351,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2455,7 +2515,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2481,7 +2542,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2496,7 +2558,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2513,7 +2576,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2537,7 +2601,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2552,7 +2617,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2715,7 +2781,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2741,7 +2808,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2756,7 +2824,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2773,7 +2842,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2797,7 +2867,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2821,7 +2892,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2845,7 +2917,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2869,7 +2942,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2893,7 +2967,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2917,7 +2992,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2941,7 +3017,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2965,7 +3042,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2989,7 +3067,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3013,7 +3092,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3037,7 +3117,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
index 2c77ace8b53e5..3dafe85ef950a 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
@@ -125,6 +125,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
                         validate_upstreams_against_patterns=False,
                         include_operational_stats=True,
                         email_as_user_identifier=True,
+                        incremental_lineage=False,
                         start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
                             tzinfo=timezone.utc
                         ),
@@ -213,6 +214,7 @@ def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_
                         include_views=False,
                         include_view_lineage=False,
                         include_usage_stats=False,
+                        incremental_lineage=False,
                         include_operational_stats=False,
                         start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
                             tzinfo=timezone.utc
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
index bba53c1e97a47..cd53b8f7db4f6 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
@@ -283,10 +283,12 @@ def test_snowflake_unexpected_snowflake_view_lineage_error_causes_pipeline_warni
         )
 
         snowflake_pipeline_config1 = snowflake_pipeline_config.copy()
-        cast(
+        config = cast(
             SnowflakeV2Config,
             cast(PipelineConfig, snowflake_pipeline_config1).source.config,
-        ).include_view_lineage = True
+        )
+        config.include_view_lineage = True
+        config.incremental_lineage = False
         pipeline = Pipeline(snowflake_pipeline_config1)
         pipeline.run()
         pipeline.raise_from_status()  # pipeline should not fail
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py
index f72bd5b72d2cd..7e2ac94fa4e35 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py
@@ -31,6 +31,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
                 match_fully_qualified_names=True,
                 schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
                 include_tables=include_tables,
+                incremental_lineage=False,
                 stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj(
                     {
                         "enabled": True,
@@ -49,7 +50,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
 
 
 @freeze_time(FROZEN_TIME)
-def test_tableau_stateful(mock_datahub_graph):
+def test_stale_metadata_removal(mock_datahub_graph):
     with mock.patch(
         "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
         mock_datahub_graph,
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json
new file mode 100644
index 0000000000000..812566143014b
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json
@@ -0,0 +1,106 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "run-id",
+        "lastRunId": "no-run-id-provided"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json
new file mode 100644
index 0000000000000..17f4d10728268
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json
@@ -0,0 +1,120 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "run-id",
+        "lastRunId": "no-run-id-provided"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json
new file mode 100644
index 0000000000000..c828373c73080
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json
@@ -0,0 +1,41 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)",
+    "changeType": "PATCH",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": [
+            {
+                "op": "add",
+                "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aplatform%2Cupstream1%2CPROD%29",
+                "value": {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            },
+            {
+                "op": "add",
+                "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aplatform%2Cupstream2%2CPROD%29",
+                "value": {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            }
+        ]
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "run-id",
+        "lastRunId": "no-run-id-provided"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
new file mode 100644
index 0000000000000..4078bda26c743
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
@@ -0,0 +1,244 @@
+from typing import List, Optional
+from unittest.mock import MagicMock
+
+import pytest
+
+import datahub.metadata.schema_classes as models
+from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.source_helpers import auto_incremental_lineage
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.sink.file import write_metadata_file
+from tests.test_helpers import mce_helpers
+
+platform = "platform"
+system_metadata = models.SystemMetadataClass(lastObserved=1643871600000, runId="run-id")
+
+
+def make_lineage_aspect(
+    dataset_name: str,
+    upstreams: List[str],
+    timestamp: int = 0,
+    columns: List[str] = [],
+    include_cll: bool = False,
+) -> models.UpstreamLineageClass:
+    """
+    Generates dataset properties and upstream lineage aspects
+    with simple column to column lineage between current dataset and all upstreams
+    """
+
+    dataset_urn = make_dataset_urn(platform, dataset_name)
+    return models.UpstreamLineageClass(
+        upstreams=[
+            models.UpstreamClass(
+                dataset=upstream_urn,
+                type=models.DatasetLineageTypeClass.TRANSFORMED,
+                auditStamp=models.AuditStampClass(
+                    time=timestamp, actor="urn:li:corpuser:unknown"
+                ),
+            )
+            for upstream_urn in upstreams
+        ],
+        fineGrainedLineages=[
+            models.FineGrainedLineageClass(
+                upstreamType=models.FineGrainedLineageUpstreamTypeClass.FIELD_SET,
+                downstreamType=models.FineGrainedLineageDownstreamTypeClass.FIELD,
+                upstreams=[
+                    make_schema_field_urn(upstream_urn, col)
+                    for upstream_urn in upstreams
+                ],
+                downstreams=[make_schema_field_urn(dataset_urn, col)],
+            )
+            for col in columns
+        ]
+        if include_cll
+        else None,
+    )
+
+
+def base_table_lineage_aspect() -> models.UpstreamLineageClass:
+    return make_lineage_aspect(
+        "dataset1",
+        upstreams=[
+            make_dataset_urn(platform, name) for name in ["upstream1", "upstream2"]
+        ],
+    )
+
+
+def base_cll_aspect(timestamp: int = 0) -> models.UpstreamLineageClass:
+    return make_lineage_aspect(
+        "dataset1",
+        upstreams=[
+            make_dataset_urn(platform, name) for name in ["upstream1", "upstream2"]
+        ],
+        timestamp=timestamp,
+        columns=["col_a", "col_b", "col_c"],
+        include_cll=True,
+    )
+
+
+def test_incremental_table_lineage(tmp_path, pytestconfig):
+    test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers"
+    test_file = tmp_path / "incremental_table_lineage.json"
+    golden_file = test_resources_dir / "incremental_table_lineage_golden.json"
+
+    urn = make_dataset_urn(platform, "dataset1")
+    aspect = base_table_lineage_aspect()
+
+    processed_wus = auto_incremental_lineage(
+        graph=None,
+        incremental_lineage=True,
+        include_column_level_lineage=False,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
+            ).as_workunit()
+        ],
+    )
+
+    write_metadata_file(
+        test_file,
+        [wu.metadata for wu in processed_wus],
+    )
+    mce_helpers.check_golden_file(
+        pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file
+    )
+
+
+@pytest.mark.parametrize(
+    "gms_aspect,current_aspect,output_aspect",
+    [
+        # emitting CLL upstreamLineage over table level upstreamLineage
+        [
+            base_table_lineage_aspect(),
+            base_cll_aspect(),
+            base_cll_aspect(),
+        ],
+        # emitting upstreamLineage for the first time
+        [
+            None,
+            base_cll_aspect(),
+            base_cll_aspect(),
+        ],
+        # emitting CLL upstreamLineage over same CLL upstreamLineage
+        [
+            base_cll_aspect(),
+            base_cll_aspect(),
+            base_cll_aspect(),
+        ],
+        # emitting CLL upstreamLineage over same CLL upstreamLineage but with earlier timestamp
+        [
+            base_cll_aspect(),  # default timestamp is 0
+            base_cll_aspect(timestamp=1643871600000),
+            base_cll_aspect(timestamp=1643871600000),
+        ],
+    ],
+)
+def test_incremental_column_level_lineage(
+    gms_aspect: Optional[models.UpstreamLineageClass],
+    current_aspect: models.UpstreamLineageClass,
+    output_aspect: models.UpstreamLineageClass,
+) -> None:
+    mock_graph = MagicMock()
+    mock_graph.get_aspect.return_value = gms_aspect
+    dataset_urn = make_dataset_urn(platform, "dataset1")
+
+    processed_wus = auto_incremental_lineage(
+        graph=mock_graph,
+        incremental_lineage=True,
+        include_column_level_lineage=True,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=dataset_urn,
+                aspect=current_aspect,
+                systemMetadata=system_metadata,
+            ).as_workunit()
+        ],
+    )
+
+    wu: MetadataWorkUnit = next(iter(processed_wus))
+    aspect = wu.get_aspect_of_type(models.UpstreamLineageClass)
+    assert aspect == output_aspect
+
+
+def test_incremental_column_lineage_less_upstreams_in_gms_aspect(
+    tmp_path, pytestconfig
+):
+    test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers"
+    test_file = tmp_path / "incremental_cll_less_upstreams_in_gms_aspect.json"
+    golden_file = (
+        test_resources_dir / "incremental_cll_less_upstreams_in_gms_aspect_golden.json"
+    )
+
+    urn = make_dataset_urn(platform, "dataset1")
+    aspect = base_cll_aspect()
+
+    mock_graph = MagicMock()
+    mock_graph.get_aspect.return_value = make_lineage_aspect(
+        "dataset1",
+        upstreams=[make_dataset_urn(platform, name) for name in ["upstream1"]],
+        columns=["col_a", "col_b", "col_c"],
+        include_cll=True,
+    )
+
+    processed_wus = auto_incremental_lineage(
+        graph=mock_graph,
+        incremental_lineage=True,
+        include_column_level_lineage=True,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
+            ).as_workunit()
+        ],
+    )
+
+    write_metadata_file(
+        test_file,
+        [wu.metadata for wu in processed_wus],
+    )
+    mce_helpers.check_golden_file(
+        pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file
+    )
+
+
+def test_incremental_column_lineage_more_upstreams_in_gms_aspect(
+    tmp_path, pytestconfig
+):
+    test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers"
+    test_file = tmp_path / "incremental_cll_more_upstreams_in_gms_aspect.json"
+    golden_file = (
+        test_resources_dir / "incremental_cll_more_upstreams_in_gms_aspect_golden.json"
+    )
+
+    urn = make_dataset_urn(platform, "dataset1")
+    aspect = base_cll_aspect()
+
+    mock_graph = MagicMock()
+    mock_graph.get_aspect.return_value = make_lineage_aspect(
+        "dataset1",
+        upstreams=[
+            make_dataset_urn(platform, name)
+            for name in ["upstream1", "upstream2", "upstream3"]
+        ],
+        columns=["col_a", "col_b", "col_c"],
+        include_cll=True,
+    )
+
+    processed_wus = auto_incremental_lineage(
+        graph=mock_graph,
+        incremental_lineage=True,
+        include_column_level_lineage=True,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
+            ).as_workunit()
+        ],
+    )
+
+    write_metadata_file(
+        test_file,
+        [wu.metadata for wu in processed_wus],
+    )
+    mce_helpers.check_golden_file(
+        pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file
+    )
diff --git a/metadata-ingestion/tests/unit/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
similarity index 100%
rename from metadata-ingestion/tests/unit/test_source_helpers.py
rename to metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py

From 57f855ecd11632e884b12fda0fc57e2694ee26a5 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Tue, 10 Oct 2023 12:18:21 +0530
Subject: [PATCH 23/98] feat(ingest): refactor + simplify incremental lineage
 helper (#8976)

---
 .../api/incremental_lineage_helper.py         | 139 ++++++++++++++++++
 .../datahub/ingestion/api/source_helpers.py   | 138 +----------------
 .../source/snowflake/snowflake_v2.py          |   4 +-
 .../test_incremental_lineage_helper.py        |   6 +-
 4 files changed, 142 insertions(+), 145 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py

diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
new file mode 100644
index 0000000000000..9478c5cf7efa2
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
@@ -0,0 +1,139 @@
+import copy
+from typing import Dict, Iterable, Optional
+
+from datahub.emitter.mce_builder import datahub_guid, set_aspect
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.metadata.schema_classes import (
+    FineGrainedLineageClass,
+    MetadataChangeEventClass,
+    SystemMetadataClass,
+    UpstreamClass,
+    UpstreamLineageClass,
+)
+from datahub.specific.dataset import DatasetPatchBuilder
+
+
+def _convert_upstream_lineage_to_patch(
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    patch_builder = DatasetPatchBuilder(urn, system_metadata)
+    for upstream in aspect.upstreams:
+        patch_builder.add_upstream_lineage(upstream)
+    mcp = next(iter(patch_builder.build()))
+    return MetadataWorkUnit(id=f"{urn}-upstreamLineage", mcp_raw=mcp)
+
+
+def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str:
+    return datahub_guid(
+        {
+            "upstreams": sorted(fine_upstream.upstreams or []),
+            "downstreams": sorted(fine_upstream.downstreams or []),
+            "transformOperation": fine_upstream.transformOperation,
+        }
+    )
+
+
+def _merge_upstream_lineage(
+    new_aspect: UpstreamLineageClass, gms_aspect: UpstreamLineageClass
+) -> UpstreamLineageClass:
+    merged_aspect = copy.deepcopy(gms_aspect)
+
+    upstreams_map: Dict[str, UpstreamClass] = {
+        upstream.dataset: upstream for upstream in merged_aspect.upstreams
+    }
+
+    upstreams_updated = False
+    fine_upstreams_updated = False
+
+    for table_upstream in new_aspect.upstreams:
+        if table_upstream.dataset not in upstreams_map or (
+            table_upstream.auditStamp.time
+            > upstreams_map[table_upstream.dataset].auditStamp.time
+        ):
+            upstreams_map[table_upstream.dataset] = table_upstream
+            upstreams_updated = True
+
+    if upstreams_updated:
+        merged_aspect.upstreams = list(upstreams_map.values())
+
+    if new_aspect.fineGrainedLineages and merged_aspect.fineGrainedLineages:
+        fine_upstreams_map: Dict[str, FineGrainedLineageClass] = {
+            get_fine_grained_lineage_key(fine_upstream): fine_upstream
+            for fine_upstream in merged_aspect.fineGrainedLineages
+        }
+        for column_upstream in new_aspect.fineGrainedLineages:
+            column_upstream_key = get_fine_grained_lineage_key(column_upstream)
+
+            if column_upstream_key not in fine_upstreams_map or (
+                column_upstream.confidenceScore
+                > fine_upstreams_map[column_upstream_key].confidenceScore
+            ):
+                fine_upstreams_map[column_upstream_key] = column_upstream
+                fine_upstreams_updated = True
+
+        if fine_upstreams_updated:
+            merged_aspect.fineGrainedLineages = list(fine_upstreams_map.values())
+    else:
+        merged_aspect.fineGrainedLineages = (
+            new_aspect.fineGrainedLineages or gms_aspect.fineGrainedLineages
+        )
+
+    return merged_aspect
+
+
+def _lineage_wu_via_read_modify_write(
+    graph: Optional[DataHubGraph],
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    if graph is None:
+        raise ValueError(
+            "Failed to handle incremental lineage, DataHubGraph is missing. "
+            "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. "
+        )
+    gms_aspect = graph.get_aspect(urn, UpstreamLineageClass)
+    if gms_aspect:
+        new_aspect = _merge_upstream_lineage(aspect, gms_aspect)
+    else:
+        new_aspect = aspect
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=urn, aspect=new_aspect, systemMetadata=system_metadata
+    ).as_workunit()
+
+
+def auto_incremental_lineage(
+    graph: Optional[DataHubGraph],
+    incremental_lineage: bool,
+    stream: Iterable[MetadataWorkUnit],
+) -> Iterable[MetadataWorkUnit]:
+    if not incremental_lineage:
+        yield from stream
+        return  # early exit
+
+    for wu in stream:
+        lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type(
+            UpstreamLineageClass
+        )
+        urn = wu.get_urn()
+
+        if lineage_aspect:
+            if isinstance(wu.metadata, MetadataChangeEventClass):
+                set_aspect(
+                    wu.metadata, None, UpstreamLineageClass
+                )  # we'll emit upstreamLineage separately below
+                if len(wu.metadata.proposedSnapshot.aspects) > 0:
+                    yield wu
+
+            yield _lineage_wu_via_read_modify_write(
+                graph, urn, lineage_aspect, wu.metadata.systemMetadata
+            ) if lineage_aspect.fineGrainedLineages else _convert_upstream_lineage_to_patch(
+                urn, lineage_aspect, wu.metadata.systemMetadata
+            )
+        else:
+            yield wu
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 42f970e97c95f..7fc15cf829678 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -1,4 +1,3 @@
-import copy
 import logging
 from datetime import datetime, timezone
 from typing import (
@@ -16,14 +15,9 @@
 )
 
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
-from datahub.emitter.mce_builder import (
-    datahub_guid,
-    make_dataplatform_instance_urn,
-    set_aspect,
-)
+from datahub.emitter.mce_builder import make_dataplatform_instance_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     BrowsePathEntryClass,
     BrowsePathsClass,
@@ -31,17 +25,12 @@
     ChangeTypeClass,
     ContainerClass,
     DatasetUsageStatisticsClass,
-    FineGrainedLineageClass,
     MetadataChangeEventClass,
     MetadataChangeProposalClass,
     StatusClass,
-    SystemMetadataClass,
     TagKeyClass,
     TimeWindowSizeClass,
-    UpstreamClass,
-    UpstreamLineageClass,
 )
-from datahub.specific.dataset import DatasetPatchBuilder
 from datahub.telemetry import telemetry
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.tag_urn import TagUrn
@@ -377,128 +366,3 @@ def _prepend_platform_instance(
         return [BrowsePathEntryClass(id=urn, urn=urn)] + entries
 
     return entries
-
-
-def auto_incremental_lineage(
-    graph: Optional[DataHubGraph],
-    incremental_lineage: bool,
-    include_column_level_lineage: bool,
-    stream: Iterable[MetadataWorkUnit],
-) -> Iterable[MetadataWorkUnit]:
-    if not incremental_lineage:
-        yield from stream
-        return  # early exit
-
-    for wu in stream:
-        lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type(
-            UpstreamLineageClass
-        )
-        urn = wu.get_urn()
-
-        if lineage_aspect:
-            if isinstance(wu.metadata, MetadataChangeEventClass):
-                set_aspect(
-                    wu.metadata, None, UpstreamLineageClass
-                )  # we'll emit upstreamLineage separately below
-                if len(wu.metadata.proposedSnapshot.aspects) > 0:
-                    yield wu
-
-            yield _lineage_wu_via_read_modify_write(
-                graph, urn, lineage_aspect, wu.metadata.systemMetadata
-            ) if include_column_level_lineage else _convert_upstream_lineage_to_patch(
-                urn, lineage_aspect, wu.metadata.systemMetadata
-            )
-        else:
-            yield wu
-
-
-def _convert_upstream_lineage_to_patch(
-    urn: str,
-    aspect: UpstreamLineageClass,
-    system_metadata: Optional[SystemMetadataClass],
-) -> MetadataWorkUnit:
-    patch_builder = DatasetPatchBuilder(urn, system_metadata)
-    for upstream in aspect.upstreams:
-        patch_builder.add_upstream_lineage(upstream)
-    mcp = next(iter(patch_builder.build()))
-    return MetadataWorkUnit(id=f"{urn}-upstreamLineage", mcp_raw=mcp)
-
-
-def _lineage_wu_via_read_modify_write(
-    graph: Optional[DataHubGraph],
-    urn: str,
-    aspect: UpstreamLineageClass,
-    system_metadata: Optional[SystemMetadataClass],
-) -> MetadataWorkUnit:
-    if graph is None:
-        raise ValueError(
-            "Failed to handle incremental lineage, DataHubGraph is missing. "
-            "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. "
-        )
-    gms_aspect = graph.get_aspect(urn, UpstreamLineageClass)
-    if gms_aspect:
-        new_aspect = _merge_upstream_lineage(aspect, gms_aspect)
-    else:
-        new_aspect = aspect
-
-    return MetadataChangeProposalWrapper(
-        entityUrn=urn, aspect=new_aspect, systemMetadata=system_metadata
-    ).as_workunit()
-
-
-def _merge_upstream_lineage(
-    new_aspect: UpstreamLineageClass, gms_aspect: UpstreamLineageClass
-) -> UpstreamLineageClass:
-    merged_aspect = copy.deepcopy(gms_aspect)
-
-    upstreams_map: Dict[str, UpstreamClass] = {
-        upstream.dataset: upstream for upstream in merged_aspect.upstreams
-    }
-
-    upstreams_updated = False
-    fine_upstreams_updated = False
-
-    for table_upstream in new_aspect.upstreams:
-        if table_upstream.dataset not in upstreams_map or (
-            table_upstream.auditStamp.time
-            > upstreams_map[table_upstream.dataset].auditStamp.time
-        ):
-            upstreams_map[table_upstream.dataset] = table_upstream
-            upstreams_updated = True
-
-    if upstreams_updated:
-        merged_aspect.upstreams = list(upstreams_map.values())
-
-    if new_aspect.fineGrainedLineages and merged_aspect.fineGrainedLineages:
-        fine_upstreams_map: Dict[str, FineGrainedLineageClass] = {
-            get_fine_grained_lineage_key(fine_upstream): fine_upstream
-            for fine_upstream in merged_aspect.fineGrainedLineages
-        }
-        for column_upstream in new_aspect.fineGrainedLineages:
-            column_upstream_key = get_fine_grained_lineage_key(column_upstream)
-
-            if column_upstream_key not in fine_upstreams_map or (
-                column_upstream.confidenceScore
-                > fine_upstreams_map[column_upstream_key].confidenceScore
-            ):
-                fine_upstreams_map[column_upstream_key] = column_upstream
-                fine_upstreams_updated = True
-
-        if fine_upstreams_updated:
-            merged_aspect.fineGrainedLineages = list(fine_upstreams_map.values())
-    else:
-        merged_aspect.fineGrainedLineages = (
-            new_aspect.fineGrainedLineages or gms_aspect.fineGrainedLineages
-        )
-
-    return merged_aspect
-
-
-def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str:
-    return datahub_guid(
-        {
-            "upstreams": sorted(fine_upstream.upstreams or []),
-            "downstreams": sorted(fine_upstream.downstreams or []),
-            "transformOperation": fine_upstream.transformOperation,
-        }
-    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index e0848b5f9ab34..a5c07d9a3870c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -27,6 +27,7 @@
     platform_name,
     support_status,
 )
+from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
 from datahub.ingestion.api.source import (
     CapabilityReport,
     MetadataWorkUnitProcessor,
@@ -36,7 +37,6 @@
     TestableSource,
     TestConnectionReport,
 )
-from datahub.ingestion.api.source_helpers import auto_incremental_lineage
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classification_mixin import ClassificationHandler
 from datahub.ingestion.source.common.subtypes import (
@@ -517,8 +517,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
                 auto_incremental_lineage,
                 self.ctx.graph,
                 self.config.incremental_lineage,
-                self.config.include_column_lineage
-                or self.config.include_view_column_lineage,
             ),
             StaleEntityRemovalHandler.create(
                 self, self.config, self.ctx
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
index 4078bda26c743..54a22d860285c 100644
--- a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
@@ -6,7 +6,7 @@
 import datahub.metadata.schema_classes as models
 from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.api.source_helpers import auto_incremental_lineage
+from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.sink.file import write_metadata_file
 from tests.test_helpers import mce_helpers
@@ -88,7 +88,6 @@ def test_incremental_table_lineage(tmp_path, pytestconfig):
     processed_wus = auto_incremental_lineage(
         graph=None,
         incremental_lineage=True,
-        include_column_level_lineage=False,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
@@ -146,7 +145,6 @@ def test_incremental_column_level_lineage(
     processed_wus = auto_incremental_lineage(
         graph=mock_graph,
         incremental_lineage=True,
-        include_column_level_lineage=True,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=dataset_urn,
@@ -184,7 +182,6 @@ def test_incremental_column_lineage_less_upstreams_in_gms_aspect(
     processed_wus = auto_incremental_lineage(
         graph=mock_graph,
         incremental_lineage=True,
-        include_column_level_lineage=True,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
@@ -227,7 +224,6 @@ def test_incremental_column_lineage_more_upstreams_in_gms_aspect(
     processed_wus = auto_incremental_lineage(
         graph=mock_graph,
         incremental_lineage=True,
-        include_column_level_lineage=True,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=urn, aspect=aspect, systemMetadata=system_metadata

From bb39d5418fcbf8bebbae1b510c63a1170865a072 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Tue, 10 Oct 2023 16:08:34 +0530
Subject: [PATCH 24/98] fix(lint): run black, isort (#8978)

---
 .../tests/assertions/assertions_test.py       |  33 ++--
 smoke-test/tests/browse/browse_test.py        |  51 +++++--
 smoke-test/tests/cli/datahub-cli.py           |  76 +++++++---
 smoke-test/tests/cli/datahub_graph_test.py    |  12 +-
 .../cli/delete_cmd/test_timeseries_delete.py  |  12 +-
 .../ingest_cmd/test_timeseries_rollback.py    |   6 +-
 .../cli/user_groups_cmd/test_group_cmd.py     |   3 +-
 smoke-test/tests/conftest.py                  |   4 +-
 smoke-test/tests/consistency_utils.py         |  16 +-
 .../tests/containers/containers_test.py       |   4 +-
 smoke-test/tests/cypress/integration_test.py  |  23 ++-
 .../tests/dataproduct/test_dataproduct.py     |   4 +-
 smoke-test/tests/delete/delete_test.py        |  18 +--
 .../tests/deprecation/deprecation_test.py     |   9 +-
 smoke-test/tests/domains/domains_test.py      |  15 +-
 .../managed_ingestion_test.py                 |   3 +-
 smoke-test/tests/patch/common_patch_tests.py  |  52 ++-----
 .../tests/patch/test_datajob_patches.py       |  23 +--
 .../tests/patch/test_dataset_patches.py       |  18 ++-
 smoke-test/tests/policies/test_policies.py    |  10 +-
 .../tests/setup/lineage/helper_classes.py     |   5 +-
 .../setup/lineage/ingest_data_job_change.py   |  42 ++----
 .../lineage/ingest_dataset_join_change.py     |  36 ++---
 .../lineage/ingest_input_datasets_change.py   |  42 ++----
 .../setup/lineage/ingest_time_lineage.py      |  18 ++-
 smoke-test/tests/setup/lineage/utils.py       |  85 +++++------
 .../tags-and-terms/tags_and_terms_test.py     |   4 +-
 smoke-test/tests/telemetry/telemetry_test.py  |   4 +-
 smoke-test/tests/test_result_msg.py           |  23 ++-
 smoke-test/tests/test_stateful_ingestion.py   |  14 +-
 smoke-test/tests/tests/tests_test.py          |   7 +-
 smoke-test/tests/timeline/timeline_test.py    |  67 +++++----
 .../tokens/revokable_access_token_test.py     |  12 +-
 smoke-test/tests/utils.py                     |  17 +--
 smoke-test/tests/views/views_test.py          | 142 +++++++++---------
 35 files changed, 457 insertions(+), 453 deletions(-)

diff --git a/smoke-test/tests/assertions/assertions_test.py b/smoke-test/tests/assertions/assertions_test.py
index 4aa64c512f684..48f3564e6cd97 100644
--- a/smoke-test/tests/assertions/assertions_test.py
+++ b/smoke-test/tests/assertions/assertions_test.py
@@ -2,28 +2,29 @@
 import urllib
 
 import pytest
-import requests_wrapper as requests
 import tenacity
 from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
 from datahub.ingestion.api.sink import NoopWriteCallback
 from datahub.ingestion.sink.file import FileSink, FileSinkConfig
-from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionStdAggregation
-from datahub.metadata.schema_classes import (
-    AssertionInfoClass,
-    AssertionResultClass,
-    AssertionResultTypeClass,
-    AssertionRunEventClass,
-    AssertionRunStatusClass,
-    AssertionStdOperatorClass,
-    AssertionTypeClass,
-    DatasetAssertionInfoClass,
-    DatasetAssertionScopeClass,
-    PartitionSpecClass,
-    PartitionTypeClass,
-)
-from tests.utils import delete_urns_from_file, get_gms_url, ingest_file_via_rest, wait_for_healthcheck_util, get_sleep_info
+from datahub.metadata.com.linkedin.pegasus2avro.assertion import \
+    AssertionStdAggregation
+from datahub.metadata.schema_classes import (AssertionInfoClass,
+                                             AssertionResultClass,
+                                             AssertionResultTypeClass,
+                                             AssertionRunEventClass,
+                                             AssertionRunStatusClass,
+                                             AssertionStdOperatorClass,
+                                             AssertionTypeClass,
+                                             DatasetAssertionInfoClass,
+                                             DatasetAssertionScopeClass,
+                                             PartitionSpecClass,
+                                             PartitionTypeClass)
+
+import requests_wrapper as requests
+from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info,
+                         ingest_file_via_rest, wait_for_healthcheck_util)
 
 restli_default_headers = {
     "X-RestLi-Protocol-Version": "2.0.0",
diff --git a/smoke-test/tests/browse/browse_test.py b/smoke-test/tests/browse/browse_test.py
index b9d2143d13ec7..550f0062d5a39 100644
--- a/smoke-test/tests/browse/browse_test.py
+++ b/smoke-test/tests/browse/browse_test.py
@@ -1,9 +1,10 @@
 import time
 
 import pytest
-import requests_wrapper as requests
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest
 
+import requests_wrapper as requests
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         ingest_file_via_rest)
 
 TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)"
 TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)"
@@ -51,7 +52,9 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
     # /prod -- There should be one entity
     get_browse_paths_json = {
         "query": get_browse_paths_query,
-        "variables": {"input": { "type": "DATASET", "path": ["prod"], "start": 0, "count": 100 } },
+        "variables": {
+            "input": {"type": "DATASET", "path": ["prod"], "start": 0, "count": 100}
+        },
     }
 
     response = frontend_session.post(
@@ -67,12 +70,19 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
 
     browse = res_data["data"]["browse"]
     print(browse)
-    assert browse["entities"] == [{ "urn": TEST_DATASET_3_URN }]
+    assert browse["entities"] == [{"urn": TEST_DATASET_3_URN}]
 
     # /prod/kafka1
     get_browse_paths_json = {
         "query": get_browse_paths_query,
-        "variables": {"input": { "type": "DATASET", "path": ["prod", "kafka1"], "start": 0, "count": 10 } },
+        "variables": {
+            "input": {
+                "type": "DATASET",
+                "path": ["prod", "kafka1"],
+                "start": 0,
+                "count": 10,
+            }
+        },
     }
 
     response = frontend_session.post(
@@ -88,16 +98,27 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
 
     browse = res_data["data"]["browse"]
     assert browse == {
-      "total": 3,
-      "entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }, { "urn": TEST_DATASET_3_URN }],
-      "groups": [],
-      "metadata": { "path": ["prod", "kafka1"], "totalNumEntities": 0 }
+        "total": 3,
+        "entities": [
+            {"urn": TEST_DATASET_1_URN},
+            {"urn": TEST_DATASET_2_URN},
+            {"urn": TEST_DATASET_3_URN},
+        ],
+        "groups": [],
+        "metadata": {"path": ["prod", "kafka1"], "totalNumEntities": 0},
     }
 
     # /prod/kafka2
     get_browse_paths_json = {
         "query": get_browse_paths_query,
-        "variables": {"input": { "type": "DATASET", "path": ["prod", "kafka2"], "start": 0, "count": 10 } },
+        "variables": {
+            "input": {
+                "type": "DATASET",
+                "path": ["prod", "kafka2"],
+                "start": 0,
+                "count": 10,
+            }
+        },
     }
 
     response = frontend_session.post(
@@ -113,10 +134,8 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
 
     browse = res_data["data"]["browse"]
     assert browse == {
-      "total": 2,
-      "entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }],
-      "groups": [],
-      "metadata": { "path": ["prod", "kafka2"], "totalNumEntities": 0 }
+        "total": 2,
+        "entities": [{"urn": TEST_DATASET_1_URN}, {"urn": TEST_DATASET_2_URN}],
+        "groups": [],
+        "metadata": {"path": ["prod", "kafka2"], "totalNumEntities": 0},
     }
-
-
diff --git a/smoke-test/tests/cli/datahub-cli.py b/smoke-test/tests/cli/datahub-cli.py
index 1d0080bdd9d48..c3db6028efceb 100644
--- a/smoke-test/tests/cli/datahub-cli.py
+++ b/smoke-test/tests/cli/datahub-cli.py
@@ -1,8 +1,11 @@
 import json
-import pytest
 from time import sleep
-from datahub.cli.cli_utils import guess_entity_type, post_entity, get_aspects_for_entity
+
+import pytest
+from datahub.cli.cli_utils import (get_aspects_for_entity, guess_entity_type,
+                                   post_entity)
 from datahub.cli.ingest_cli import get_session_and_host, rollback
+
 from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync
 
 ingested_dataset_run_id = ""
@@ -24,24 +27,46 @@ def test_setup():
 
     session, gms_host = get_session_and_host()
 
-    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
-    assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "browsePaths" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
+    assert "editableDatasetProperties" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
 
-    ingested_dataset_run_id = ingest_file_via_rest("tests/cli/cli_test_data.json").config.run_id
+    ingested_dataset_run_id = ingest_file_via_rest(
+        "tests/cli/cli_test_data.json"
+    ).config.run_id
     print("Setup ingestion id: " + ingested_dataset_run_id)
 
-    assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
+    assert "browsePaths" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
 
     yield
 
     # Clean up
     rollback_url = f"{gms_host}/runs?action=rollback"
 
-    session.post(rollback_url, data=json.dumps({"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True}))
-    session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True}))
+    session.post(
+        rollback_url,
+        data=json.dumps(
+            {"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True}
+        ),
+    )
+    session.post(
+        rollback_url,
+        data=json.dumps(
+            {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True}
+        ),
+    )
 
-    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
-    assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "browsePaths" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
+    assert "editableDatasetProperties" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
 
 
 @pytest.mark.dependency()
@@ -49,9 +74,7 @@ def test_rollback_editable():
     global ingested_dataset_run_id
     global ingested_editable_run_id
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-rollback"
-    )
+    dataset_name = "test-rollback"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -59,23 +82,38 @@ def test_rollback_editable():
 
     print("Ingested dataset id:", ingested_dataset_run_id)
     # Assert that second data ingestion worked
-    assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
+    assert "browsePaths" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
 
     # Make editable change
-    ingested_editable_run_id = ingest_file_via_rest("tests/cli/cli_editable_test_data.json").config.run_id
+    ingested_editable_run_id = ingest_file_via_rest(
+        "tests/cli/cli_editable_test_data.json"
+    ).config.run_id
     print("ingested editable id:", ingested_editable_run_id)
     # Assert that second data ingestion worked
-    assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "editableDatasetProperties" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
 
     # rollback ingestion 1
     rollback_url = f"{gms_host}/runs?action=rollback"
 
-    session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False}))
+    session.post(
+        rollback_url,
+        data=json.dumps(
+            {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False}
+        ),
+    )
 
     # Allow async MCP processor to handle ingestions & rollbacks
     wait_for_writes_to_sync()
 
     # EditableDatasetProperties should still be part of the entity that was soft deleted.
-    assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "editableDatasetProperties" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
     # But first ingestion aspects should not be present
-    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, typed=False)
+    assert "browsePaths" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, typed=False
+    )
diff --git a/smoke-test/tests/cli/datahub_graph_test.py b/smoke-test/tests/cli/datahub_graph_test.py
index 16925d26f6983..17c8924fb0998 100644
--- a/smoke-test/tests/cli/datahub_graph_test.py
+++ b/smoke-test/tests/cli/datahub_graph_test.py
@@ -1,13 +1,11 @@
 import pytest
 import tenacity
 from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
-from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass
-from tests.utils import (
-    delete_urns_from_file,
-    get_gms_url,
-    get_sleep_info,
-    ingest_file_via_rest,
-)
+from datahub.metadata.schema_classes import (KafkaSchemaClass,
+                                             SchemaMetadataClass)
+
+from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info,
+                         ingest_file_via_rest)
 
 sleep_sec, sleep_times = get_sleep_info()
 
diff --git a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py
index 4288a61b7a0c1..106da7cd8d71e 100644
--- a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py
+++ b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py
@@ -1,21 +1,22 @@
 import json
 import logging
+import sys
 import tempfile
 import time
-import sys
 from json import JSONDecodeError
 from typing import Any, Dict, List, Optional
 
-from click.testing import CliRunner, Result
-
 import datahub.emitter.mce_builder as builder
+from click.testing import CliRunner, Result
 from datahub.emitter.serialization_helper import pre_json_transform
 from datahub.entrypoints import datahub
 from datahub.metadata.schema_classes import DatasetProfileClass
+
+import requests_wrapper as requests
 from tests.aspect_generators.timeseries.dataset_profile_gen import \
     gen_dataset_profiles
-from tests.utils import get_strftime_from_timestamp_millis, wait_for_writes_to_sync
-import requests_wrapper as requests
+from tests.utils import (get_strftime_from_timestamp_millis,
+                         wait_for_writes_to_sync)
 
 logger = logging.getLogger(__name__)
 
@@ -33,6 +34,7 @@
 def sync_elastic() -> None:
     wait_for_writes_to_sync()
 
+
 def datahub_put_profile(dataset_profile: DatasetProfileClass) -> None:
     with tempfile.NamedTemporaryFile("w+t", suffix=".json") as aspect_file:
         aspect_text: str = json.dumps(pre_json_transform(dataset_profile.to_obj()))
diff --git a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py
index 61e7a5a65b494..e962b1a5cafd6 100644
--- a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py
+++ b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py
@@ -2,14 +2,14 @@
 import time
 from typing import Any, Dict, List, Optional
 
-from click.testing import CliRunner, Result
-
 import datahub.emitter.mce_builder as builder
+from click.testing import CliRunner, Result
 from datahub.emitter.serialization_helper import post_json_transform
 from datahub.entrypoints import datahub
 from datahub.metadata.schema_classes import DatasetProfileClass
-from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync
+
 import requests_wrapper as requests
+from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync
 
 runner = CliRunner(mix_stderr=False)
 
diff --git a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py
index 405e061c016f9..7b986d3be0444 100644
--- a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py
+++ b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py
@@ -1,6 +1,7 @@
 import json
 import sys
 import tempfile
+import time
 from typing import Any, Dict, Iterable, List
 
 import yaml
@@ -8,7 +9,7 @@
 from datahub.api.entities.corpgroup.corpgroup import CorpGroup
 from datahub.entrypoints import datahub
 from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
-import time
+
 import requests_wrapper as requests
 from tests.utils import wait_for_writes_to_sync
 
diff --git a/smoke-test/tests/conftest.py b/smoke-test/tests/conftest.py
index eed7a983197ef..57b92a2db1c19 100644
--- a/smoke-test/tests/conftest.py
+++ b/smoke-test/tests/conftest.py
@@ -2,8 +2,8 @@
 
 import pytest
 
-from tests.utils import wait_for_healthcheck_util, get_frontend_session
 from tests.test_result_msg import send_message
+from tests.utils import get_frontend_session, wait_for_healthcheck_util
 
 # Disable telemetry
 os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
@@ -28,5 +28,5 @@ def test_healthchecks(wait_for_healthchecks):
 
 
 def pytest_sessionfinish(session, exitstatus):
-    """ whole test run finishes. """
+    """whole test run finishes."""
     send_message(exitstatus)
diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py
index 15993733c592b..607835bf3649c 100644
--- a/smoke-test/tests/consistency_utils.py
+++ b/smoke-test/tests/consistency_utils.py
@@ -1,10 +1,16 @@
-import time
+import logging
 import os
 import subprocess
+import time
 
 _ELASTIC_BUFFER_WRITES_TIME_IN_SEC: int = 1
 USE_STATIC_SLEEP: bool = bool(os.getenv("USE_STATIC_SLEEP", False))
-ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int(os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5))
+ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int(
+    os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5)
+)
+
+logger = logging.getLogger(__name__)
+
 
 def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None:
     if USE_STATIC_SLEEP:
@@ -30,7 +36,9 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None:
             lag_zero = True
 
     if not lag_zero:
-        logger.warning(f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}")
+        logger.warning(
+            f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}"
+        )
     else:
         # we want to sleep for an additional period of time for Elastic writes buffer to clear
-        time.sleep(_ELASTIC_BUFFER_WRITES_TIME_IN_SEC)
\ No newline at end of file
+        time.sleep(_ELASTIC_BUFFER_WRITES_TIME_IN_SEC)
diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py
index 575e3def6cf23..05a45239dabf8 100644
--- a/smoke-test/tests/containers/containers_test.py
+++ b/smoke-test/tests/containers/containers_test.py
@@ -1,5 +1,7 @@
 import pytest
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest
+
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         ingest_file_via_rest)
 
 
 @pytest.fixture(scope="module", autouse=False)
diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py
index b3bacf39ac7ae..4ad2bc53fa87d 100644
--- a/smoke-test/tests/cypress/integration_test.py
+++ b/smoke-test/tests/cypress/integration_test.py
@@ -1,18 +1,16 @@
-from typing import Set, List
-
 import datetime
-import pytest
-import subprocess
 import os
+import subprocess
+from typing import List, Set
+
+import pytest
+
+from tests.setup.lineage.ingest_time_lineage import (get_time_lineage_urns,
+                                                     ingest_time_lineage)
+from tests.utils import (create_datahub_step_state_aspects, delete_urns,
+                         delete_urns_from_file, get_admin_username,
+                         ingest_file_via_rest)
 
-from tests.utils import (
-    create_datahub_step_state_aspects,
-    get_admin_username,
-    ingest_file_via_rest,
-    delete_urns_from_file,
-    delete_urns,
-)
-from tests.setup.lineage.ingest_time_lineage import ingest_time_lineage, get_time_lineage_urns
 CYPRESS_TEST_DATA_DIR = "tests/cypress"
 
 TEST_DATA_FILENAME = "data.json"
@@ -145,7 +143,6 @@ def ingest_cleanup_data():
     delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}")
     delete_urns(get_time_lineage_urns())
 
-
     print_now()
     print("deleting onboarding data file")
     if os.path.exists(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}"):
diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py
index db198098f21fa..baef1cb1cb3ba 100644
--- a/smoke-test/tests/dataproduct/test_dataproduct.py
+++ b/smoke-test/tests/dataproduct/test_dataproduct.py
@@ -1,4 +1,6 @@
+import logging
 import os
+import subprocess
 import tempfile
 import time
 from random import randint
@@ -17,8 +19,6 @@
                                              DomainPropertiesClass,
                                              DomainsClass)
 from datahub.utilities.urns.urn import Urn
-import subprocess
-import logging
 
 logger = logging.getLogger(__name__)
 
diff --git a/smoke-test/tests/delete/delete_test.py b/smoke-test/tests/delete/delete_test.py
index 68e001f983fbf..d920faaf3a89a 100644
--- a/smoke-test/tests/delete/delete_test.py
+++ b/smoke-test/tests/delete/delete_test.py
@@ -1,16 +1,14 @@
-import os
 import json
-import pytest
+import os
 from time import sleep
+
+import pytest
 from datahub.cli.cli_utils import get_aspects_for_entity
 from datahub.cli.ingest_cli import get_session_and_host
-from tests.utils import (
-    ingest_file_via_rest,
-    wait_for_healthcheck_util,
-    delete_urns_from_file,
-    wait_for_writes_to_sync,
-    get_datahub_graph,
-)
+
+from tests.utils import (delete_urns_from_file, get_datahub_graph,
+                         ingest_file_via_rest, wait_for_healthcheck_util,
+                         wait_for_writes_to_sync)
 
 # Disable telemetry
 os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
@@ -102,7 +100,7 @@ def test_delete_reference(test_setup, depends=["test_healthchecks"]):
     graph.delete_references_to_urn(tag_urn, dry_run=False)
 
     wait_for_writes_to_sync()
-    
+
     # Validate that references no longer exist
     references_count, related_aspects = graph.delete_references_to_urn(
         tag_urn, dry_run=True
diff --git a/smoke-test/tests/deprecation/deprecation_test.py b/smoke-test/tests/deprecation/deprecation_test.py
index 1149a970aa8e5..a8969804d03d7 100644
--- a/smoke-test/tests/deprecation/deprecation_test.py
+++ b/smoke-test/tests/deprecation/deprecation_test.py
@@ -1,10 +1,7 @@
 import pytest
-from tests.utils import (
-    delete_urns_from_file,
-    get_frontend_url,
-    ingest_file_via_rest,
-    get_root_urn,
-)
+
+from tests.utils import (delete_urns_from_file, get_frontend_url, get_root_urn,
+                         ingest_file_via_rest)
 
 
 @pytest.fixture(scope="module", autouse=True)
diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py
index 7ffe1682cafd8..fa8c918e3cbe1 100644
--- a/smoke-test/tests/domains/domains_test.py
+++ b/smoke-test/tests/domains/domains_test.py
@@ -1,12 +1,8 @@
 import pytest
 import tenacity
-from tests.utils import (
-    delete_urns_from_file,
-    get_frontend_url,
-    get_gms_url,
-    ingest_file_via_rest,
-    get_sleep_info,
-)
+
+from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url,
+                         get_sleep_info, ingest_file_via_rest)
 
 sleep_sec, sleep_times = get_sleep_info()
 
@@ -240,4 +236,7 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data):
 
     assert res_data
     assert res_data["data"]["dataset"]["domain"]["domain"]["urn"] == domain_urn
-    assert res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"] == "Engineering"
+    assert (
+        res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"]
+        == "Engineering"
+    )
diff --git a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py
index 1238a1dd5730a..b5e408731334e 100644
--- a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py
+++ b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py
@@ -3,7 +3,8 @@
 import pytest
 import tenacity
 
-from tests.utils import get_frontend_url, get_sleep_info, wait_for_healthcheck_util
+from tests.utils import (get_frontend_url, get_sleep_info,
+                         wait_for_healthcheck_util)
 
 sleep_sec, sleep_times = get_sleep_info()
 
diff --git a/smoke-test/tests/patch/common_patch_tests.py b/smoke-test/tests/patch/common_patch_tests.py
index 574e4fd4e4c88..f1d6abf5da794 100644
--- a/smoke-test/tests/patch/common_patch_tests.py
+++ b/smoke-test/tests/patch/common_patch_tests.py
@@ -2,25 +2,17 @@
 import uuid
 from typing import Dict, Optional, Type
 
-from datahub.emitter.mce_builder import (
-    make_tag_urn,
-    make_term_urn,
-    make_user_urn,
-)
+from datahub.emitter.mce_builder import (make_tag_urn, make_term_urn,
+                                         make_user_urn)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
 from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig
-from datahub.metadata.schema_classes import (
-    AuditStampClass,
-    GlobalTagsClass,
-    GlossaryTermAssociationClass,
-    GlossaryTermsClass,
-    OwnerClass,
-    OwnershipClass,
-    OwnershipTypeClass,
-    TagAssociationClass,
-    _Aspect,
-)
+from datahub.metadata.schema_classes import (AuditStampClass, GlobalTagsClass,
+                                             GlossaryTermAssociationClass,
+                                             GlossaryTermsClass, OwnerClass,
+                                             OwnershipClass,
+                                             OwnershipTypeClass,
+                                             TagAssociationClass, _Aspect)
 
 
 def helper_test_entity_terms_patch(
@@ -34,18 +26,14 @@ def get_terms(graph, entity_urn):
 
     term_urn = make_term_urn(term=f"testTerm-{uuid.uuid4()}")
 
-    term_association = GlossaryTermAssociationClass(
-        urn=term_urn, context="test"
-    )
+    term_association = GlossaryTermAssociationClass(urn=term_urn, context="test")
     global_terms = GlossaryTermsClass(
         terms=[term_association],
         auditStamp=AuditStampClass(
             time=int(time.time() * 1000.0), actor=make_user_urn("tester")
         ),
     )
-    mcpw = MetadataChangeProposalWrapper(
-        entityUrn=test_entity_urn, aspect=global_terms
-    )
+    mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_terms)
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         graph.emit_mcp(mcpw)
@@ -88,9 +76,7 @@ def helper_test_dataset_tags_patch(
 
     tag_association = TagAssociationClass(tag=tag_urn, context="test")
     global_tags = GlobalTagsClass(tags=[tag_association])
-    mcpw = MetadataChangeProposalWrapper(
-        entityUrn=test_entity_urn, aspect=global_tags
-    )
+    mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_tags)
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         graph.emit_mcp(mcpw)
@@ -153,15 +139,11 @@ def helper_test_ownership_patch(
         assert owner.owners[0].owner == make_user_urn("jdoe")
 
         for patch_mcp in (
-            patch_builder_class(test_entity_urn)
-            .add_owner(owner_to_add)
-            .build()
+            patch_builder_class(test_entity_urn).add_owner(owner_to_add).build()
         ):
             graph.emit_mcp(patch_mcp)
 
-        owner = graph.get_aspect(
-            entity_urn=test_entity_urn, aspect_type=OwnershipClass
-        )
+        owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass)
         assert len(owner.owners) == 2
 
         for patch_mcp in (
@@ -171,9 +153,7 @@ def helper_test_ownership_patch(
         ):
             graph.emit_mcp(patch_mcp)
 
-        owner = graph.get_aspect(
-            entity_urn=test_entity_urn, aspect_type=OwnershipClass
-        )
+        owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass)
         assert len(owner.owners) == 1
         assert owner.owners[0].owner == make_user_urn("jdoe")
 
@@ -199,9 +179,7 @@ def get_custom_properties(
     orig_aspect = base_aspect
     assert hasattr(orig_aspect, "customProperties")
     orig_aspect.customProperties = base_property_map
-    mcpw = MetadataChangeProposalWrapper(
-        entityUrn=test_entity_urn, aspect=orig_aspect
-    )
+    mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=orig_aspect)
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         graph.emit(mcpw)
diff --git a/smoke-test/tests/patch/test_datajob_patches.py b/smoke-test/tests/patch/test_datajob_patches.py
index 407410ee89914..342d5d683228a 100644
--- a/smoke-test/tests/patch/test_datajob_patches.py
+++ b/smoke-test/tests/patch/test_datajob_patches.py
@@ -3,19 +3,14 @@
 from datahub.emitter.mce_builder import make_data_job_urn, make_dataset_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig
-from datahub.metadata.schema_classes import (
-    DataJobInfoClass,
-    DataJobInputOutputClass,
-    EdgeClass,
-)
+from datahub.metadata.schema_classes import (DataJobInfoClass,
+                                             DataJobInputOutputClass,
+                                             EdgeClass)
 from datahub.specific.datajob import DataJobPatchBuilder
 
 from tests.patch.common_patch_tests import (
-    helper_test_custom_properties_patch,
-    helper_test_dataset_tags_patch,
-    helper_test_entity_terms_patch,
-    helper_test_ownership_patch,
-)
+    helper_test_custom_properties_patch, helper_test_dataset_tags_patch,
+    helper_test_entity_terms_patch, helper_test_ownership_patch)
 
 
 def _make_test_datajob_urn(
@@ -37,16 +32,12 @@ def test_datajob_ownership_patch(wait_for_healthchecks):
 
 # Tags
 def test_datajob_tags_patch(wait_for_healthchecks):
-    helper_test_dataset_tags_patch(
-        _make_test_datajob_urn(), DataJobPatchBuilder
-    )
+    helper_test_dataset_tags_patch(_make_test_datajob_urn(), DataJobPatchBuilder)
 
 
 # Terms
 def test_dataset_terms_patch(wait_for_healthchecks):
-    helper_test_entity_terms_patch(
-        _make_test_datajob_urn(), DataJobPatchBuilder
-    )
+    helper_test_entity_terms_patch(_make_test_datajob_urn(), DataJobPatchBuilder)
 
 
 # Custom Properties
diff --git a/smoke-test/tests/patch/test_dataset_patches.py b/smoke-test/tests/patch/test_dataset_patches.py
index 239aab64675d8..6704d19760fb9 100644
--- a/smoke-test/tests/patch/test_dataset_patches.py
+++ b/smoke-test/tests/patch/test_dataset_patches.py
@@ -20,7 +20,10 @@
                                              UpstreamClass,
                                              UpstreamLineageClass)
 from datahub.specific.dataset import DatasetPatchBuilder
-from tests.patch.common_patch_tests import helper_test_entity_terms_patch, helper_test_dataset_tags_patch, helper_test_ownership_patch, helper_test_custom_properties_patch
+
+from tests.patch.common_patch_tests import (
+    helper_test_custom_properties_patch, helper_test_dataset_tags_patch,
+    helper_test_entity_terms_patch, helper_test_ownership_patch)
 
 
 # Common Aspect Patch Tests
@@ -31,6 +34,7 @@ def test_dataset_ownership_patch(wait_for_healthchecks):
     )
     helper_test_ownership_patch(dataset_urn, DatasetPatchBuilder)
 
+
 # Tags
 def test_dataset_tags_patch(wait_for_healthchecks):
     dataset_urn = make_dataset_urn(
@@ -38,6 +42,7 @@ def test_dataset_tags_patch(wait_for_healthchecks):
     )
     helper_test_dataset_tags_patch(dataset_urn, DatasetPatchBuilder)
 
+
 # Terms
 def test_dataset_terms_patch(wait_for_healthchecks):
     dataset_urn = make_dataset_urn(
@@ -284,8 +289,15 @@ def test_custom_properties_patch(wait_for_healthchecks):
     dataset_urn = make_dataset_urn(
         platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD"
     )
-    orig_dataset_properties = DatasetPropertiesClass(name="test_name", description="test_description")
-    helper_test_custom_properties_patch(test_entity_urn=dataset_urn, patch_builder_class=DatasetPatchBuilder, custom_properties_aspect_class=DatasetPropertiesClass, base_aspect=orig_dataset_properties)
+    orig_dataset_properties = DatasetPropertiesClass(
+        name="test_name", description="test_description"
+    )
+    helper_test_custom_properties_patch(
+        test_entity_urn=dataset_urn,
+        patch_builder_class=DatasetPatchBuilder,
+        custom_properties_aspect_class=DatasetPropertiesClass,
+        base_aspect=orig_dataset_properties,
+    )
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         # Patch custom properties along with name
diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py
index b7091541894dd..67142181d2b96 100644
--- a/smoke-test/tests/policies/test_policies.py
+++ b/smoke-test/tests/policies/test_policies.py
@@ -1,12 +1,8 @@
 import pytest
 import tenacity
-from tests.utils import (
-    get_frontend_url,
-    wait_for_healthcheck_util,
-    get_frontend_session,
-    get_sleep_info,
-    get_root_urn,
-)
+
+from tests.utils import (get_frontend_session, get_frontend_url, get_root_urn,
+                         get_sleep_info, wait_for_healthcheck_util)
 
 TEST_POLICY_NAME = "Updated Platform Policy"
 
diff --git a/smoke-test/tests/setup/lineage/helper_classes.py b/smoke-test/tests/setup/lineage/helper_classes.py
index 53f77b08d15ed..d550f3093be85 100644
--- a/smoke-test/tests/setup/lineage/helper_classes.py
+++ b/smoke-test/tests/setup/lineage/helper_classes.py
@@ -1,10 +1,7 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
-from datahub.metadata.schema_classes import (
-    EdgeClass,
-    SchemaFieldDataTypeClass,
-)
+from datahub.metadata.schema_classes import EdgeClass, SchemaFieldDataTypeClass
 
 
 @dataclass
diff --git a/smoke-test/tests/setup/lineage/ingest_data_job_change.py b/smoke-test/tests/setup/lineage/ingest_data_job_change.py
index 8e3e9c5352922..588a1625419bc 100644
--- a/smoke-test/tests/setup/lineage/ingest_data_job_change.py
+++ b/smoke-test/tests/setup/lineage/ingest_data_job_change.py
@@ -1,36 +1,20 @@
 from typing import List
 
-from datahub.emitter.mce_builder import (
-    make_dataset_urn,
-    make_data_flow_urn,
-    make_data_job_urn_with_flow,
-)
+from datahub.emitter.mce_builder import (make_data_flow_urn,
+                                         make_data_job_urn_with_flow,
+                                         make_dataset_urn)
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.metadata.schema_classes import (
-    DateTypeClass,
-    NumberTypeClass,
-    SchemaFieldDataTypeClass,
-    StringTypeClass,
-)
+from datahub.metadata.schema_classes import (DateTypeClass, NumberTypeClass,
+                                             SchemaFieldDataTypeClass,
+                                             StringTypeClass)
 
-from tests.setup.lineage.constants import (
-    AIRFLOW_DATA_PLATFORM,
-    SNOWFLAKE_DATA_PLATFORM,
-    TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
-    TIMESTAMP_MILLIS_ONE_DAY_AGO,
-)
-from tests.setup.lineage.helper_classes import (
-    Field,
-    Dataset,
-    Task,
-    Pipeline,
-)
-from tests.setup.lineage.utils import (
-    create_edge,
-    create_node,
-    create_nodes_and_edges,
-    emit_mcps,
-)
+from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM,
+                                           SNOWFLAKE_DATA_PLATFORM,
+                                           TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
+                                           TIMESTAMP_MILLIS_ONE_DAY_AGO)
+from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task
+from tests.setup.lineage.utils import (create_edge, create_node,
+                                       create_nodes_and_edges, emit_mcps)
 
 # Constants for Case 2
 DAILY_TEMPERATURE_DATASET_ID = "climate.daily_temperature"
diff --git a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py
index 35a8e6d5cf02e..bb9f51b6b5e9b 100644
--- a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py
+++ b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py
@@ -1,32 +1,18 @@
 from typing import List
 
-from datahub.emitter.mce_builder import (
-    make_dataset_urn,
-)
+from datahub.emitter.mce_builder import make_dataset_urn
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.metadata.schema_classes import (
-    NumberTypeClass,
-    SchemaFieldDataTypeClass,
-    StringTypeClass,
-    UpstreamClass,
-)
+from datahub.metadata.schema_classes import (NumberTypeClass,
+                                             SchemaFieldDataTypeClass,
+                                             StringTypeClass, UpstreamClass)
 
-from tests.setup.lineage.constants import (
-    DATASET_ENTITY_TYPE,
-    SNOWFLAKE_DATA_PLATFORM,
-    TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
-    TIMESTAMP_MILLIS_ONE_DAY_AGO,
-)
-from tests.setup.lineage.helper_classes import (
-    Field,
-    Dataset,
-)
-from tests.setup.lineage.utils import (
-    create_node,
-    create_upstream_edge,
-    create_upstream_mcp,
-    emit_mcps,
-)
+from tests.setup.lineage.constants import (DATASET_ENTITY_TYPE,
+                                           SNOWFLAKE_DATA_PLATFORM,
+                                           TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
+                                           TIMESTAMP_MILLIS_ONE_DAY_AGO)
+from tests.setup.lineage.helper_classes import Dataset, Field
+from tests.setup.lineage.utils import (create_node, create_upstream_edge,
+                                       create_upstream_mcp, emit_mcps)
 
 # Constants for Case 3
 GDP_DATASET_ID = "economic_data.gdp"
diff --git a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py
index f4fb795147478..6079d7a3d2b63 100644
--- a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py
+++ b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py
@@ -1,36 +1,20 @@
 from typing import List
 
-from datahub.emitter.mce_builder import (
-    make_dataset_urn,
-    make_data_flow_urn,
-    make_data_job_urn_with_flow,
-)
+from datahub.emitter.mce_builder import (make_data_flow_urn,
+                                         make_data_job_urn_with_flow,
+                                         make_dataset_urn)
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.metadata.schema_classes import (
-    NumberTypeClass,
-    SchemaFieldDataTypeClass,
-    StringTypeClass,
-)
-
-from tests.setup.lineage.constants import (
-    AIRFLOW_DATA_PLATFORM,
-    BQ_DATA_PLATFORM,
-    TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
-    TIMESTAMP_MILLIS_ONE_DAY_AGO,
-)
-from tests.setup.lineage.helper_classes import (
-    Field,
-    Dataset,
-    Task,
-    Pipeline,
-)
-from tests.setup.lineage.utils import (
-    create_edge,
-    create_node,
-    create_nodes_and_edges,
-    emit_mcps,
-)
+from datahub.metadata.schema_classes import (NumberTypeClass,
+                                             SchemaFieldDataTypeClass,
+                                             StringTypeClass)
 
+from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM,
+                                           BQ_DATA_PLATFORM,
+                                           TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
+                                           TIMESTAMP_MILLIS_ONE_DAY_AGO)
+from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task
+from tests.setup.lineage.utils import (create_edge, create_node,
+                                       create_nodes_and_edges, emit_mcps)
 
 # Constants for Case 1
 TRANSACTIONS_DATASET_ID = "transactions.transactions"
diff --git a/smoke-test/tests/setup/lineage/ingest_time_lineage.py b/smoke-test/tests/setup/lineage/ingest_time_lineage.py
index cae8e0124d501..3aec979707290 100644
--- a/smoke-test/tests/setup/lineage/ingest_time_lineage.py
+++ b/smoke-test/tests/setup/lineage/ingest_time_lineage.py
@@ -1,12 +1,14 @@
+import os
 from typing import List
 
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 
-from tests.setup.lineage.ingest_input_datasets_change import ingest_input_datasets_change, get_input_datasets_change_urns
-from tests.setup.lineage.ingest_data_job_change import ingest_data_job_change, get_data_job_change_urns
-from tests.setup.lineage.ingest_dataset_join_change import ingest_dataset_join_change, get_dataset_join_change_urns
-
-import os
+from tests.setup.lineage.ingest_data_job_change import (
+    get_data_job_change_urns, ingest_data_job_change)
+from tests.setup.lineage.ingest_dataset_join_change import (
+    get_dataset_join_change_urns, ingest_dataset_join_change)
+from tests.setup.lineage.ingest_input_datasets_change import (
+    get_input_datasets_change_urns, ingest_input_datasets_change)
 
 SERVER = os.getenv("DATAHUB_SERVER") or "http://localhost:8080"
 TOKEN = os.getenv("DATAHUB_TOKEN") or ""
@@ -20,4 +22,8 @@ def ingest_time_lineage() -> None:
 
 
 def get_time_lineage_urns() -> List[str]:
-    return get_input_datasets_change_urns() + get_data_job_change_urns() + get_dataset_join_change_urns()
+    return (
+        get_input_datasets_change_urns()
+        + get_data_job_change_urns()
+        + get_dataset_join_change_urns()
+    )
diff --git a/smoke-test/tests/setup/lineage/utils.py b/smoke-test/tests/setup/lineage/utils.py
index 672f7a945a6af..c72f6ccb89b7a 100644
--- a/smoke-test/tests/setup/lineage/utils.py
+++ b/smoke-test/tests/setup/lineage/utils.py
@@ -1,41 +1,30 @@
 import datetime
-from datahub.emitter.mce_builder import (
-    make_data_platform_urn,
-    make_dataset_urn,
-    make_data_job_urn_with_flow,
-    make_data_flow_urn,
-)
+from typing import List
+
+from datahub.emitter.mce_builder import (make_data_flow_urn,
+                                         make_data_job_urn_with_flow,
+                                         make_data_platform_urn,
+                                         make_dataset_urn)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
-from datahub.metadata.schema_classes import (
-    AuditStampClass,
-    ChangeTypeClass,
-    DatasetLineageTypeClass,
-    DatasetPropertiesClass,
-    DataFlowInfoClass,
-    DataJobInputOutputClass,
-    DataJobInfoClass,
-    EdgeClass,
-    MySqlDDLClass,
-    SchemaFieldClass,
-    SchemaMetadataClass,
-    UpstreamClass,
-)
-from typing import List
-
-from tests.setup.lineage.constants import (
-    DATASET_ENTITY_TYPE,
-    DATA_JOB_ENTITY_TYPE,
-    DATA_FLOW_ENTITY_TYPE,
-    DATA_FLOW_INFO_ASPECT_NAME,
-    DATA_JOB_INFO_ASPECT_NAME,
-    DATA_JOB_INPUT_OUTPUT_ASPECT_NAME,
-)
-from tests.setup.lineage.helper_classes import (
-    Dataset,
-    Pipeline,
-)
+from datahub.metadata.schema_classes import (AuditStampClass, ChangeTypeClass,
+                                             DataFlowInfoClass,
+                                             DataJobInfoClass,
+                                             DataJobInputOutputClass,
+                                             DatasetLineageTypeClass,
+                                             DatasetPropertiesClass, EdgeClass,
+                                             MySqlDDLClass, SchemaFieldClass,
+                                             SchemaMetadataClass,
+                                             UpstreamClass)
+
+from tests.setup.lineage.constants import (DATA_FLOW_ENTITY_TYPE,
+                                           DATA_FLOW_INFO_ASPECT_NAME,
+                                           DATA_JOB_ENTITY_TYPE,
+                                           DATA_JOB_INFO_ASPECT_NAME,
+                                           DATA_JOB_INPUT_OUTPUT_ASPECT_NAME,
+                                           DATASET_ENTITY_TYPE)
+from tests.setup.lineage.helper_classes import Dataset, Pipeline
 
 
 def create_node(dataset: Dataset) -> List[MetadataChangeProposalWrapper]:
@@ -85,10 +74,10 @@ def create_node(dataset: Dataset) -> List[MetadataChangeProposalWrapper]:
 
 
 def create_edge(
-        source_urn: str,
-        destination_urn: str,
-        created_timestamp_millis: int,
-        updated_timestamp_millis: int,
+    source_urn: str,
+    destination_urn: str,
+    created_timestamp_millis: int,
+    updated_timestamp_millis: int,
 ) -> EdgeClass:
     created_audit_stamp: AuditStampClass = AuditStampClass(
         time=created_timestamp_millis, actor="urn:li:corpuser:unknown"
@@ -105,7 +94,7 @@ def create_edge(
 
 
 def create_nodes_and_edges(
-        airflow_dag: Pipeline,
+    airflow_dag: Pipeline,
 ) -> List[MetadataChangeProposalWrapper]:
     mcps = []
     data_flow_urn = make_data_flow_urn(
@@ -160,9 +149,9 @@ def create_nodes_and_edges(
 
 
 def create_upstream_edge(
-        upstream_entity_urn: str,
-        created_timestamp_millis: int,
-        updated_timestamp_millis: int,
+    upstream_entity_urn: str,
+    created_timestamp_millis: int,
+    updated_timestamp_millis: int,
 ):
     created_audit_stamp: AuditStampClass = AuditStampClass(
         time=created_timestamp_millis, actor="urn:li:corpuser:unknown"
@@ -180,11 +169,11 @@ def create_upstream_edge(
 
 
 def create_upstream_mcp(
-        entity_type: str,
-        entity_urn: str,
-        upstreams: List[UpstreamClass],
-        timestamp_millis: int,
-        run_id: str = "",
+    entity_type: str,
+    entity_urn: str,
+    upstreams: List[UpstreamClass],
+    timestamp_millis: int,
+    run_id: str = "",
 ) -> MetadataChangeProposalWrapper:
     print(f"Creating upstreamLineage aspect for {entity_urn}")
     timestamp_millis: int = int(datetime.datetime.now().timestamp() * 1000)
@@ -203,7 +192,7 @@ def create_upstream_mcp(
 
 
 def emit_mcps(
-        emitter: DatahubRestEmitter, mcps: List[MetadataChangeProposalWrapper]
+    emitter: DatahubRestEmitter, mcps: List[MetadataChangeProposalWrapper]
 ) -> None:
     for mcp in mcps:
         emitter.emit_mcp(mcp)
diff --git a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py
index b0ca29b544cfe..6ac75765286f0 100644
--- a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py
+++ b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py
@@ -1,5 +1,7 @@
 import pytest
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util
+
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         ingest_file_via_rest, wait_for_healthcheck_util)
 
 
 @pytest.fixture(scope="module", autouse=True)
diff --git a/smoke-test/tests/telemetry/telemetry_test.py b/smoke-test/tests/telemetry/telemetry_test.py
index 3672abcda948d..3127061c9f506 100644
--- a/smoke-test/tests/telemetry/telemetry_test.py
+++ b/smoke-test/tests/telemetry/telemetry_test.py
@@ -7,5 +7,7 @@ def test_no_clientID():
     client_id_urn = "urn:li:telemetry:clientId"
     aspect = ["telemetryClientId"]
 
-    res_data = json.dumps(get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False))
+    res_data = json.dumps(
+        get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False)
+    )
     assert res_data == "{}"
diff --git a/smoke-test/tests/test_result_msg.py b/smoke-test/tests/test_result_msg.py
index e3b336db9d66c..b9775e8ee4acd 100644
--- a/smoke-test/tests/test_result_msg.py
+++ b/smoke-test/tests/test_result_msg.py
@@ -1,6 +1,6 @@
-from slack_sdk import WebClient
 import os
 
+from slack_sdk import WebClient
 
 datahub_stats = {}
 
@@ -10,10 +10,10 @@ def add_datahub_stats(stat_name, stat_val):
 
 
 def send_to_slack(passed: str):
-    slack_api_token = os.getenv('SLACK_API_TOKEN')
-    slack_channel = os.getenv('SLACK_CHANNEL')
-    slack_thread_ts = os.getenv('SLACK_THREAD_TS')
-    test_identifier = os.getenv('TEST_IDENTIFIER', 'LOCAL_TEST')
+    slack_api_token = os.getenv("SLACK_API_TOKEN")
+    slack_channel = os.getenv("SLACK_CHANNEL")
+    slack_thread_ts = os.getenv("SLACK_THREAD_TS")
+    test_identifier = os.getenv("TEST_IDENTIFIER", "LOCAL_TEST")
     if slack_api_token is None or slack_channel is None:
         return
     client = WebClient(token=slack_api_token)
@@ -26,14 +26,21 @@ def send_to_slack(passed: str):
             message += f"Num {entity_type} is {val}\n"
 
     if slack_thread_ts is None:
-        client.chat_postMessage(channel=slack_channel, text=f'{test_identifier} Status - {passed}\n{message}')
+        client.chat_postMessage(
+            channel=slack_channel,
+            text=f"{test_identifier} Status - {passed}\n{message}",
+        )
     else:
-        client.chat_postMessage(channel=slack_channel, text=f'{test_identifier} Status - {passed}\n{message}', thread_ts=slack_thread_ts)
+        client.chat_postMessage(
+            channel=slack_channel,
+            text=f"{test_identifier} Status - {passed}\n{message}",
+            thread_ts=slack_thread_ts,
+        )
 
 
 def send_message(exitstatus):
     try:
-        send_to_slack('PASSED' if exitstatus == 0 else 'FAILED')
+        send_to_slack("PASSED" if exitstatus == 0 else "FAILED")
     except Exception as e:
         # We don't want to fail pytest at all
         print(f"Exception happened for sending msg to slack {e}")
diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py
index a10cf13a08029..c6adb402e5d51 100644
--- a/smoke-test/tests/test_stateful_ingestion.py
+++ b/smoke-test/tests/test_stateful_ingestion.py
@@ -4,17 +4,15 @@
 from datahub.ingestion.run.pipeline import Pipeline
 from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
 from datahub.ingestion.source.state.checkpoint import Checkpoint
-from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
-from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalHandler
+from datahub.ingestion.source.state.entity_removal_state import \
+    GenericCheckpointState
+from datahub.ingestion.source.state.stale_entity_removal_handler import \
+    StaleEntityRemovalHandler
 from sqlalchemy import create_engine
 from sqlalchemy.sql import text
 
-from tests.utils import (
-    get_gms_url,
-    get_mysql_password,
-    get_mysql_url,
-    get_mysql_username,
-)
+from tests.utils import (get_gms_url, get_mysql_password, get_mysql_url,
+                         get_mysql_username)
 
 
 def test_stateful_ingestion(wait_for_healthchecks):
diff --git a/smoke-test/tests/tests/tests_test.py b/smoke-test/tests/tests/tests_test.py
index 0b87f90a92c58..213a2ea087b7a 100644
--- a/smoke-test/tests/tests/tests_test.py
+++ b/smoke-test/tests/tests/tests_test.py
@@ -1,9 +1,13 @@
 import pytest
 import tenacity
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util, get_sleep_info
+
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         get_sleep_info, ingest_file_via_rest,
+                         wait_for_healthcheck_util)
 
 sleep_sec, sleep_times = get_sleep_info()
 
+
 @pytest.fixture(scope="module", autouse=True)
 def ingest_cleanup_data(request):
     print("ingesting test data")
@@ -18,6 +22,7 @@ def wait_for_healthchecks():
     wait_for_healthcheck_util()
     yield
 
+
 @pytest.mark.dependency()
 def test_healthchecks(wait_for_healthchecks):
     # Call to wait_for_healthchecks fixture will do the actual functionality.
diff --git a/smoke-test/tests/timeline/timeline_test.py b/smoke-test/tests/timeline/timeline_test.py
index a73d585c6c72d..4705343c1a2ba 100644
--- a/smoke-test/tests/timeline/timeline_test.py
+++ b/smoke-test/tests/timeline/timeline_test.py
@@ -3,14 +3,14 @@
 
 from datahub.cli import timeline_cli
 from datahub.cli.cli_utils import guess_entity_type, post_entity
-from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync, get_datahub_graph
+
+from tests.utils import (get_datahub_graph, ingest_file_via_rest,
+                         wait_for_writes_to_sync)
 
 
 def test_all():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -18,8 +18,13 @@ def test_all():
     ingest_file_via_rest("tests/timeline/timeline_test_datav2.json")
     ingest_file_via_rest("tests/timeline/timeline_test_datav3.json")
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM",
-                                                       "OWNER"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn,
+        ["TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM", "OWNER"],
+        None,
+        None,
+        False,
+    )
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
 
     assert res_data
@@ -35,9 +40,7 @@ def test_all():
 
 def test_schema():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -45,7 +48,9 @@ def test_schema():
     put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav2.json")
     put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav3.json")
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False
+    )
 
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
     assert res_data
@@ -61,9 +66,7 @@ def test_schema():
 
 def test_glossary():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -71,7 +74,9 @@ def test_glossary():
     put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv2.json")
     put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv3.json")
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["GLOSSARY_TERM"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn, ["GLOSSARY_TERM"], None, None, False
+    )
 
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
     assert res_data
@@ -87,17 +92,29 @@ def test_glossary():
 
 def test_documentation():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
-    put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentation.json")
-    put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv2.json")
-    put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv3.json")
+    put(
+        dataset_urn,
+        "institutionalMemory",
+        "test_resources/timeline/newdocumentation.json",
+    )
+    put(
+        dataset_urn,
+        "institutionalMemory",
+        "test_resources/timeline/newdocumentationv2.json",
+    )
+    put(
+        dataset_urn,
+        "institutionalMemory",
+        "test_resources/timeline/newdocumentationv3.json",
+    )
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["DOCUMENTATION"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn, ["DOCUMENTATION"], None, None, False
+    )
 
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
     assert res_data
@@ -113,9 +130,7 @@ def test_documentation():
 
 def test_tags():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -139,9 +154,7 @@ def test_tags():
 
 def test_ownership():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py
index b10ad3aa3fc2a..55f3de594af4e 100644
--- a/smoke-test/tests/tokens/revokable_access_token_test.py
+++ b/smoke-test/tests/tokens/revokable_access_token_test.py
@@ -1,15 +1,11 @@
 import os
-import pytest
-import requests
 from time import sleep
 
-from tests.utils import (
-    get_frontend_url,
-    wait_for_healthcheck_util,
-    get_admin_credentials,
-    wait_for_writes_to_sync,
-)
+import pytest
+import requests
 
+from tests.utils import (get_admin_credentials, get_frontend_url,
+                         wait_for_healthcheck_util, wait_for_writes_to_sync)
 
 # Disable telemetry
 os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py
index af03efd4f71f8..bd75b13d1910f 100644
--- a/smoke-test/tests/utils.py
+++ b/smoke-test/tests/utils.py
@@ -1,19 +1,20 @@
 import functools
 import json
+import logging
 import os
-from datetime import datetime, timedelta, timezone
 import subprocess
 import time
-from typing import Any, Dict, List, Tuple
+from datetime import datetime, timedelta, timezone
 from time import sleep
-from joblib import Parallel, delayed
+from typing import Any, Dict, List, Tuple
 
-import requests_wrapper as requests
-import logging
 from datahub.cli import cli_utils
 from datahub.cli.cli_utils import get_system_auth
-from datahub.ingestion.graph.client import DataHubGraph, DatahubClientConfig
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
 from datahub.ingestion.run.pipeline import Pipeline
+from joblib import Parallel, delayed
+
+import requests_wrapper as requests
 from tests.consistency_utils import wait_for_writes_to_sync
 
 TIME: int = 1581407189000
@@ -174,6 +175,7 @@ def delete(entry):
 
     wait_for_writes_to_sync()
 
+
 # Fixed now value
 NOW: datetime = datetime.now()
 
@@ -232,6 +234,3 @@ def create_datahub_step_state_aspects(
     ]
     with open(onboarding_filename, "w") as f:
         json.dump(aspects_dict, f, indent=2)
-
-
-
diff --git a/smoke-test/tests/views/views_test.py b/smoke-test/tests/views/views_test.py
index 4da69750a167b..685c3bd80b04d 100644
--- a/smoke-test/tests/views/views_test.py
+++ b/smoke-test/tests/views/views_test.py
@@ -1,16 +1,14 @@
-import pytest
 import time
+
+import pytest
 import tenacity
-from tests.utils import (
-    delete_urns_from_file,
-    get_frontend_url,
-    get_gms_url,
-    ingest_file_via_rest,
-    get_sleep_info,
-)
+
+from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url,
+                         get_sleep_info, ingest_file_via_rest)
 
 sleep_sec, sleep_times = get_sleep_info()
 
+
 @pytest.mark.dependency()
 def test_healthchecks(wait_for_healthchecks):
     # Call to wait_for_healthchecks fixture will do the actual functionality.
@@ -40,6 +38,7 @@ def _ensure_more_views(frontend_session, list_views_json, query_name, before_cou
     assert after_count == before_count + 1
     return after_count
 
+
 @tenacity.retry(
     stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec)
 )
@@ -111,18 +110,18 @@ def test_create_list_delete_global_view(frontend_session):
     new_view_name = "Test View"
     new_view_description = "Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD"],
-      "filter": {
-        "operator": "AND",
-        "filters": [
-          {
-            "field": "tags",
-            "values": ["urn:li:tag:test"],
-            "negated": False,
-            "condition": "EQUAL"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD"],
+        "filter": {
+            "operator": "AND",
+            "filters": [
+                {
+                    "field": "tags",
+                    "values": ["urn:li:tag:test"],
+                    "negated": False,
+                    "condition": "EQUAL",
+                }
+            ],
+        },
     }
 
     # Create new View
@@ -137,7 +136,7 @@ def test_create_list_delete_global_view(frontend_session):
                 "viewType": "GLOBAL",
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
+                "definition": new_view_definition,
             }
         },
     }
@@ -169,9 +168,7 @@ def test_create_list_delete_global_view(frontend_session):
         "query": """mutation deleteView($urn: String!) {\n
             deleteView(urn: $urn)
         }""",
-        "variables": {
-            "urn": view_urn
-        },
+        "variables": {"urn": view_urn},
     }
 
     response = frontend_session.post(
@@ -189,7 +186,9 @@ def test_create_list_delete_global_view(frontend_session):
     )
 
 
-@pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_delete_global_view"])
+@pytest.mark.dependency(
+    depends=["test_healthchecks", "test_create_list_delete_global_view"]
+)
 def test_create_list_delete_personal_view(frontend_session):
 
     # Get count of existing views
@@ -237,18 +236,18 @@ def test_create_list_delete_personal_view(frontend_session):
     new_view_name = "Test View"
     new_view_description = "Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD"],
-      "filter": {
-        "operator": "AND",
-        "filters": [
-          {
-            "field": "tags",
-            "values": ["urn:li:tag:test"],
-            "negated": False,
-            "condition": "EQUAL"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD"],
+        "filter": {
+            "operator": "AND",
+            "filters": [
+                {
+                    "field": "tags",
+                    "values": ["urn:li:tag:test"],
+                    "negated": False,
+                    "condition": "EQUAL",
+                }
+            ],
+        },
     }
 
     # Create new View
@@ -263,7 +262,7 @@ def test_create_list_delete_personal_view(frontend_session):
                 "viewType": "PERSONAL",
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
+                "definition": new_view_definition,
             }
         },
     }
@@ -293,9 +292,7 @@ def test_create_list_delete_personal_view(frontend_session):
         "query": """mutation deleteView($urn: String!) {\n
             deleteView(urn: $urn)
         }""",
-        "variables": {
-            "urn": view_urn
-        },
+        "variables": {"urn": view_urn},
     }
 
     response = frontend_session.post(
@@ -312,25 +309,28 @@ def test_create_list_delete_personal_view(frontend_session):
         before_count=new_count,
     )
 
-@pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_delete_personal_view"])
+
+@pytest.mark.dependency(
+    depends=["test_healthchecks", "test_create_list_delete_personal_view"]
+)
 def test_update_global_view(frontend_session):
 
     # First create a view
     new_view_name = "Test View"
     new_view_description = "Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD"],
-      "filter": {
-        "operator": "AND",
-        "filters": [
-          {
-            "field": "tags",
-            "values": ["urn:li:tag:test"],
-            "negated": False,
-            "condition": "EQUAL"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD"],
+        "filter": {
+            "operator": "AND",
+            "filters": [
+                {
+                    "field": "tags",
+                    "values": ["urn:li:tag:test"],
+                    "negated": False,
+                    "condition": "EQUAL",
+                }
+            ],
+        },
     }
 
     # Create new View
@@ -345,7 +345,7 @@ def test_update_global_view(frontend_session):
                 "viewType": "PERSONAL",
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
+                "definition": new_view_definition,
             }
         },
     }
@@ -366,18 +366,18 @@ def test_update_global_view(frontend_session):
     new_view_name = "New Test View"
     new_view_description = "New Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD", "CHART", "DATA_FLOW"],
-      "filter": {
-        "operator": "OR",
-        "filters": [
-          {
-            "field": "glossaryTerms",
-            "values": ["urn:li:glossaryTerm:test"],
-            "negated": True,
-            "condition": "CONTAIN"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD", "CHART", "DATA_FLOW"],
+        "filter": {
+            "operator": "OR",
+            "filters": [
+                {
+                    "field": "glossaryTerms",
+                    "values": ["urn:li:glossaryTerm:test"],
+                    "negated": True,
+                    "condition": "CONTAIN",
+                }
+            ],
+        },
     }
 
     update_view_json = {
@@ -391,8 +391,8 @@ def test_update_global_view(frontend_session):
             "input": {
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
-            }
+                "definition": new_view_definition,
+            },
         },
     }
 
@@ -411,9 +411,7 @@ def test_update_global_view(frontend_session):
         "query": """mutation deleteView($urn: String!) {\n
             deleteView(urn: $urn)
         }""",
-        "variables": {
-            "urn": view_urn
-        },
+        "variables": {"urn": view_urn},
     }
 
     response = frontend_session.post(

From 6ecdeda5ff590456c6bfadfa5c37821f7281169e Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Tue, 10 Oct 2023 16:28:40 +0530
Subject: [PATCH 25/98] fix(setup): drop older table if exists (#8979)

---
 docker/mariadb/init.sql        | 2 ++
 docker/mysql-setup/init.sql    | 2 ++
 docker/mysql/init.sql          | 2 ++
 docker/postgres-setup/init.sql | 2 ++
 docker/postgres/init.sql       | 2 ++
 5 files changed, 10 insertions(+)

diff --git a/docker/mariadb/init.sql b/docker/mariadb/init.sql
index c4132575cf442..95c8cabbc5ca4 100644
--- a/docker/mariadb/init.sql
+++ b/docker/mariadb/init.sql
@@ -28,3 +28,5 @@ insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat
   now(),
   'urn:li:corpuser:__datahub_system'
 );
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/mysql-setup/init.sql b/docker/mysql-setup/init.sql
index 2370a971941d2..b789329ddfd17 100644
--- a/docker/mysql-setup/init.sql
+++ b/docker/mysql-setup/init.sql
@@ -39,3 +39,5 @@ INSERT INTO metadata_aspect_v2
 SELECT * FROM temp_metadata_aspect_v2
 WHERE NOT EXISTS (SELECT * from metadata_aspect_v2);
 DROP TABLE temp_metadata_aspect_v2;
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/mysql/init.sql b/docker/mysql/init.sql
index b4b4e4617806c..aca57d7cd444c 100644
--- a/docker/mysql/init.sql
+++ b/docker/mysql/init.sql
@@ -27,3 +27,5 @@ INSERT INTO metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat
   now(),
   'urn:li:corpuser:__datahub_system'
 );
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/postgres-setup/init.sql b/docker/postgres-setup/init.sql
index 12fff7aec7fe6..72b2f73192e00 100644
--- a/docker/postgres-setup/init.sql
+++ b/docker/postgres-setup/init.sql
@@ -35,3 +35,5 @@ INSERT INTO metadata_aspect_v2
 SELECT * FROM temp_metadata_aspect_v2
 WHERE NOT EXISTS (SELECT * from metadata_aspect_v2);
 DROP TABLE temp_metadata_aspect_v2;
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/postgres/init.sql b/docker/postgres/init.sql
index cf477c135422e..87c8dd3337fac 100644
--- a/docker/postgres/init.sql
+++ b/docker/postgres/init.sql
@@ -28,3 +28,5 @@ insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat
   now(),
   'urn:li:corpuser:__datahub_system'
 );
+
+DROP TABLE IF EXISTS metadata_index;

From 1a72fa499c3404c6c3d2961e9575495f2dd021d2 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 10 Oct 2023 17:34:06 -0400
Subject: [PATCH 26/98] feat(ingest/tableau): Allow parsing of database name
 from fullName (#8981)

---
 .../src/datahub/ingestion/source/tableau.py   |  74 ++------
 .../ingestion/source/tableau_common.py        | 162 +++++++++++++-----
 .../tableau/test_tableau_ingest.py            |  34 ++--
 3 files changed, 151 insertions(+), 119 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index e347cd26d245a..bad7ae49d325e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -77,6 +77,7 @@
     FIELD_TYPE_MAPPING,
     MetadataQueryException,
     TableauLineageOverrides,
+    TableauUpstreamReference,
     clean_query,
     custom_sql_graphql_query,
     dashboard_graphql_query,
@@ -85,7 +86,6 @@
     get_overridden_info,
     get_unique_custom_sql,
     make_fine_grained_lineage_class,
-    make_table_urn,
     make_upstream_class,
     published_datasource_graphql_query,
     query_metadata,
@@ -271,7 +271,7 @@ class TableauConfig(
         "You can change this if your Tableau projects contain slashes in their names, and you'd like to filter by project.",
     )
 
-    default_schema_map: dict = Field(
+    default_schema_map: Dict[str, str] = Field(
         default={}, description="Default schema to use when schema is not found."
     )
     ingest_tags: Optional[bool] = Field(
@@ -997,41 +997,16 @@ def get_upstream_tables(
                 )
                 continue
 
-            schema = table.get(tableau_constant.SCHEMA) or ""
-            table_name = table.get(tableau_constant.NAME) or ""
-            full_name = table.get(tableau_constant.FULL_NAME) or ""
-            upstream_db = (
-                table[tableau_constant.DATABASE][tableau_constant.NAME]
-                if table.get(tableau_constant.DATABASE)
-                and table[tableau_constant.DATABASE].get(tableau_constant.NAME)
-                else ""
-            )
-            logger.debug(
-                "Processing Table with Connection Type: {0} and id {1}".format(
-                    table.get(tableau_constant.CONNECTION_TYPE) or "",
-                    table.get(tableau_constant.ID) or "",
+            try:
+                ref = TableauUpstreamReference.create(
+                    table, default_schema_map=self.config.default_schema_map
                 )
-            )
-            schema = self._get_schema(schema, upstream_db, full_name)
-            # if the schema is included within the table name we omit it
-            if (
-                schema
-                and table_name
-                and full_name
-                and table_name == full_name
-                and schema in table_name
-            ):
-                logger.debug(
-                    f"Omitting schema for upstream table {table[tableau_constant.ID]}, schema included in table name"
-                )
-                schema = ""
+            except Exception as e:
+                logger.info(f"Failed to generate upstream reference for {table}: {e}")
+                continue
 
-            table_urn = make_table_urn(
+            table_urn = ref.make_dataset_urn(
                 self.config.env,
-                upstream_db,
-                table.get(tableau_constant.CONNECTION_TYPE) or "",
-                schema,
-                table_name,
                 self.config.platform_instance_map,
                 self.config.lineage_overrides,
             )
@@ -1052,7 +1027,7 @@ def get_upstream_tables(
                     urn=table_urn,
                     id=table[tableau_constant.ID],
                     num_cols=num_tbl_cols,
-                    paths=set([table_path]) if table_path else set(),
+                    paths={table_path} if table_path else set(),
                 )
             else:
                 self.database_tables[table_urn].update_table(
@@ -2462,35 +2437,6 @@ def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]:
                 is_embedded_ds=True,
             )
 
-    @lru_cache(maxsize=None)
-    def _get_schema(self, schema_provided: str, database: str, fullName: str) -> str:
-        # For some databases, the schema attribute in tableau api does not return
-        # correct schema name for the table. For more information, see
-        # https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_model.html#schema_attribute.
-        # Hence we extract schema from fullName whenever fullName is available
-        schema = self._extract_schema_from_fullName(fullName) if fullName else ""
-        if not schema:
-            schema = schema_provided
-        elif schema != schema_provided:
-            logger.debug(
-                "Correcting schema, provided {0}, corrected {1}".format(
-                    schema_provided, schema
-                )
-            )
-
-        if not schema and database in self.config.default_schema_map:
-            schema = self.config.default_schema_map[database]
-
-        return schema
-
-    @lru_cache(maxsize=None)
-    def _extract_schema_from_fullName(self, fullName: str) -> str:
-        # fullName is observed to be in format [schemaName].[tableName]
-        # OR simply tableName OR [tableName]
-        if fullName.startswith("[") and "].[" in fullName:
-            return fullName[1 : fullName.index("]")]
-        return ""
-
     @lru_cache(maxsize=None)
     def get_last_modified(
         self, creator: Optional[str], created_at: bytes, updated_at: bytes
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
index 2c92285fdba77..7c4852042ce7c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
@@ -1,4 +1,6 @@
 import html
+import logging
+from dataclasses import dataclass
 from functools import lru_cache
 from typing import Dict, List, Optional, Tuple
 
@@ -6,6 +8,7 @@
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
+from datahub.ingestion.source import tableau_constant as tc
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
     DatasetLineageType,
     FineGrainedLineage,
@@ -31,6 +34,8 @@
 )
 from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
 
+logger = logging.getLogger(__name__)
+
 
 class TableauLineageOverrides(ConfigModel):
     platform_override_map: Optional[Dict[str, str]] = Field(
@@ -537,12 +542,12 @@ def get_fully_qualified_table_name(
     platform: str,
     upstream_db: str,
     schema: str,
-    full_name: str,
+    table_name: str,
 ) -> str:
     if platform == "athena":
         upstream_db = ""
     database_name = f"{upstream_db}." if upstream_db else ""
-    final_name = full_name.replace("[", "").replace("]", "")
+    final_name = table_name.replace("[", "").replace("]", "")
 
     schema_name = f"{schema}." if schema else ""
 
@@ -573,17 +578,123 @@ def get_fully_qualified_table_name(
     return fully_qualified_table_name
 
 
-def get_platform_instance(
-    platform: str, platform_instance_map: Optional[Dict[str, str]]
-) -> Optional[str]:
-    if platform_instance_map is not None and platform in platform_instance_map.keys():
-        return platform_instance_map[platform]
+@dataclass
+class TableauUpstreamReference:
+    database: Optional[str]
+    schema: Optional[str]
+    table: str
+
+    connection_type: str
+
+    @classmethod
+    def create(
+        cls, d: dict, default_schema_map: Optional[Dict[str, str]] = None
+    ) -> "TableauUpstreamReference":
+        # Values directly from `table` object from Tableau
+        database = t_database = d.get(tc.DATABASE, {}).get(tc.NAME)
+        schema = t_schema = d.get(tc.SCHEMA)
+        table = t_table = d.get(tc.NAME) or ""
+        t_full_name = d.get(tc.FULL_NAME)
+        t_connection_type = d[tc.CONNECTION_TYPE]  # required to generate urn
+        t_id = d[tc.ID]
+
+        parsed_full_name = cls.parse_full_name(t_full_name)
+        if parsed_full_name and len(parsed_full_name) == 3:
+            database, schema, table = parsed_full_name
+        elif parsed_full_name and len(parsed_full_name) == 2:
+            schema, table = parsed_full_name
+        else:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f"  Did not parse full name {t_full_name}: unexpected number of values",
+            )
+
+        if not schema and default_schema_map and database in default_schema_map:
+            schema = default_schema_map[database]
+
+        if database != t_database:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f" replacing database {t_database} with {database} from full name {t_full_name}"
+            )
+        if schema != t_schema:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f" replacing schema {t_schema} with {schema} from full name {t_full_name}"
+            )
+        if table != t_table:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f" replacing table {t_table} with {table} from full name {t_full_name}"
+            )
+
+        # TODO: See if we can remove this -- made for redshift
+        if (
+            schema
+            and t_table
+            and t_full_name
+            and t_table == t_full_name
+            and schema in t_table
+        ):
+            logger.debug(
+                f"Omitting schema for upstream table {t_id}, schema included in table name"
+            )
+            schema = ""
+
+        return cls(
+            database=database,
+            schema=schema,
+            table=table,
+            connection_type=t_connection_type,
+        )
+
+    @staticmethod
+    def parse_full_name(full_name: Optional[str]) -> Optional[List[str]]:
+        # fullName is observed to be in formats:
+        #  [database].[schema].[table]
+        #  [schema].[table]
+        #  [table]
+        #  table
+        #  schema
+
+        # TODO: Validate the startswith check. Currently required for our integration tests
+        if full_name is None or not full_name.startswith("["):
+            return None
+
+        return full_name.replace("[", "").replace("]", "").split(".")
+
+    def make_dataset_urn(
+        self,
+        env: str,
+        platform_instance_map: Optional[Dict[str, str]],
+        lineage_overrides: Optional[TableauLineageOverrides] = None,
+    ) -> str:
+        (
+            upstream_db,
+            platform_instance,
+            platform,
+            original_platform,
+        ) = get_overridden_info(
+            connection_type=self.connection_type,
+            upstream_db=self.database,
+            lineage_overrides=lineage_overrides,
+            platform_instance_map=platform_instance_map,
+        )
+
+        table_name = get_fully_qualified_table_name(
+            original_platform,
+            upstream_db or "",
+            self.schema,
+            self.table,
+        )
 
-    return None
+        return builder.make_dataset_urn_with_platform_instance(
+            platform, table_name, platform_instance, env
+        )
 
 
 def get_overridden_info(
-    connection_type: str,
+    connection_type: Optional[str],
     upstream_db: Optional[str],
     platform_instance_map: Optional[Dict[str, str]],
     lineage_overrides: Optional[TableauLineageOverrides] = None,
@@ -605,7 +716,9 @@ def get_overridden_info(
     ):
         upstream_db = lineage_overrides.database_override_map[upstream_db]
 
-    platform_instance = get_platform_instance(original_platform, platform_instance_map)
+    platform_instance = (
+        platform_instance_map.get(original_platform) if platform_instance_map else None
+    )
 
     if original_platform in ("athena", "hive", "mysql"):  # Two tier databases
         upstream_db = None
@@ -613,35 +726,6 @@ def get_overridden_info(
     return upstream_db, platform_instance, platform, original_platform
 
 
-def make_table_urn(
-    env: str,
-    upstream_db: Optional[str],
-    connection_type: str,
-    schema: str,
-    full_name: str,
-    platform_instance_map: Optional[Dict[str, str]],
-    lineage_overrides: Optional[TableauLineageOverrides] = None,
-) -> str:
-
-    upstream_db, platform_instance, platform, original_platform = get_overridden_info(
-        connection_type=connection_type,
-        upstream_db=upstream_db,
-        lineage_overrides=lineage_overrides,
-        platform_instance_map=platform_instance_map,
-    )
-
-    table_name = get_fully_qualified_table_name(
-        original_platform,
-        upstream_db if upstream_db is not None else "",
-        schema,
-        full_name,
-    )
-
-    return builder.make_dataset_urn_with_platform_instance(
-        platform, table_name, platform_instance, env
-    )
-
-
 def make_description_from_params(description, formula):
     """
     Generate column description
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index c31867f5aa904..0510f4a40f659 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -20,7 +20,7 @@
 from datahub.ingestion.source.tableau import TableauConfig, TableauSource
 from datahub.ingestion.source.tableau_common import (
     TableauLineageOverrides,
-    make_table_urn,
+    TableauUpstreamReference,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
     DatasetLineageType,
@@ -546,13 +546,13 @@ def test_lineage_overrides():
     enable_logging()
     # Simple - specify platform instance to presto table
     assert (
-        make_table_urn(
-            DEFAULT_ENV,
+        TableauUpstreamReference(
             "presto_catalog",
-            "presto",
             "test-schema",
-            "presto_catalog.test-schema.test-table",
-            platform_instance_map={"presto": "my_presto_instance"},
+            "test-table",
+            "presto",
+        ).make_dataset_urn(
+            env=DEFAULT_ENV, platform_instance_map={"presto": "my_presto_instance"}
         )
         == "urn:li:dataset:(urn:li:dataPlatform:presto,my_presto_instance.presto_catalog.test-schema.test-table,PROD)"
     )
@@ -560,12 +560,13 @@ def test_lineage_overrides():
     # Transform presto urn to hive urn
     # resulting platform instance for hive = mapped platform instance + presto_catalog
     assert (
-        make_table_urn(
-            DEFAULT_ENV,
+        TableauUpstreamReference(
             "presto_catalog",
-            "presto",
             "test-schema",
-            "presto_catalog.test-schema.test-table",
+            "test-table",
+            "presto",
+        ).make_dataset_urn(
+            env=DEFAULT_ENV,
             platform_instance_map={"presto": "my_instance"},
             lineage_overrides=TableauLineageOverrides(
                 platform_override_map={"presto": "hive"},
@@ -574,14 +575,15 @@ def test_lineage_overrides():
         == "urn:li:dataset:(urn:li:dataPlatform:hive,my_instance.presto_catalog.test-schema.test-table,PROD)"
     )
 
-    # tranform hive urn to presto urn
+    # transform hive urn to presto urn
     assert (
-        make_table_urn(
-            DEFAULT_ENV,
-            "",
-            "hive",
+        TableauUpstreamReference(
+            None,
             "test-schema",
-            "test-schema.test-table",
+            "test-table",
+            "hive",
+        ).make_dataset_urn(
+            env=DEFAULT_ENV,
             platform_instance_map={"hive": "my_presto_instance.presto_catalog"},
             lineage_overrides=TableauLineageOverrides(
                 platform_override_map={"hive": "presto"},

From e2988017c23270acd95e25ec3289983ecc3895f7 Mon Sep 17 00:00:00 2001
From: Amanda Hernando <110099762+amanda-her@users.noreply.github.com>
Date: Wed, 11 Oct 2023 01:36:01 +0200
Subject: [PATCH 27/98] feat(auth): add data platform instance field resolver
 provider (#8828)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sergio Gómez Villamor <sgomezvillamor@gmail.com>
Co-authored-by: Adrián Pertíñez <khurzak92@gmail.com>
---
 .../authorization/ResolvedResourceSpec.java   |  17 ++
 .../authorization/ResourceFieldType.java      |   6 +-
 .../DefaultResourceSpecResolver.java          |   9 +-
 ...PlatformInstanceFieldResolverProvider.java |  70 +++++++
 ...formInstanceFieldResolverProviderTest.java | 188 ++++++++++++++++++
 5 files changed, 286 insertions(+), 4 deletions(-)
 create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
 create mode 100644 metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java

diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
index 53dd0be44f963..8e429a8ca1b94 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
@@ -3,6 +3,7 @@
 import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
+import javax.annotation.Nullable;
 import lombok.Getter;
 import lombok.RequiredArgsConstructor;
 import lombok.ToString;
@@ -35,4 +36,20 @@ public Set<String> getOwners() {
     }
     return fieldResolvers.get(ResourceFieldType.OWNER).getFieldValuesFuture().join().getValues();
   }
+
+  /**
+   * Fetch the platform instance for a Resolved Resource Spec
+   * @return a Platform Instance or null if one does not exist.
+   */
+  @Nullable
+  public String getDataPlatformInstance() {
+    if (!fieldResolvers.containsKey(ResourceFieldType.DATA_PLATFORM_INSTANCE)) {
+      return null;
+    }
+    Set<String> dataPlatformInstance = fieldResolvers.get(ResourceFieldType.DATA_PLATFORM_INSTANCE).getFieldValuesFuture().join().getValues();
+    if (dataPlatformInstance.size() > 0) {
+      return dataPlatformInstance.stream().findFirst().get();
+    }
+    return null;
+  }
 }
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
index ee54d2bfbba1d..478522dc7c331 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
@@ -19,5 +19,9 @@ public enum ResourceFieldType {
   /**
    * Domains of resource
    */
-  DOMAIN
+  DOMAIN,
+  /**
+   * Data platform instance of resource
+   */
+  DATA_PLATFORM_INSTANCE
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
index cd4e0b0967829..64c43dc8aa591 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
@@ -1,13 +1,15 @@
 package com.datahub.authorization;
 
-import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
 import com.datahub.authentication.Authentication;
+import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.ResourceFieldResolverProvider;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.entity.client.EntityClient;
+
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -20,7 +22,8 @@ public DefaultResourceSpecResolver(Authentication systemAuthentication, EntityCl
     _resourceFieldResolverProviders =
         ImmutableList.of(new EntityTypeFieldResolverProvider(), new EntityUrnFieldResolverProvider(),
             new DomainFieldResolverProvider(entityClient, systemAuthentication),
-            new OwnerFieldResolverProvider(entityClient, systemAuthentication));
+            new OwnerFieldResolverProvider(entityClient, systemAuthentication),
+            new DataPlatformInstanceFieldResolverProvider(entityClient, systemAuthentication));
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
new file mode 100644
index 0000000000000..cd838625c2ca1
--- /dev/null
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
@@ -0,0 +1,70 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.FieldResolver;
+import com.datahub.authorization.ResourceFieldType;
+import com.datahub.authorization.ResourceSpec;
+import com.linkedin.common.DataPlatformInstance;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.client.EntityClient;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.Collections;
+import java.util.Objects;
+
+import static com.linkedin.metadata.Constants.*;
+
+/**
+ * Provides field resolver for domain given resourceSpec
+ */
+@Slf4j
+@RequiredArgsConstructor
+public class DataPlatformInstanceFieldResolverProvider implements ResourceFieldResolverProvider {
+
+  private final EntityClient _entityClient;
+  private final Authentication _systemAuthentication;
+
+  @Override
+  public ResourceFieldType getFieldType() {
+    return ResourceFieldType.DATA_PLATFORM_INSTANCE;
+  }
+
+  @Override
+  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
+    return FieldResolver.getResolverFromFunction(resourceSpec, this::getDataPlatformInstance);
+  }
+
+  private FieldResolver.FieldValue getDataPlatformInstance(ResourceSpec resourceSpec) {
+    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+    // In the case that the entity is a platform instance, the associated platform instance entity is the instance itself
+    if (entityUrn.getEntityType().equals(DATA_PLATFORM_INSTANCE_ENTITY_NAME)) {
+      return FieldResolver.FieldValue.builder()
+          .values(Collections.singleton(entityUrn.toString()))
+          .build();
+    }
+
+    EnvelopedAspect dataPlatformInstanceAspect;
+    try {
+      EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
+          Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME), _systemAuthentication);
+      if (response == null || !response.getAspects().containsKey(DATA_PLATFORM_INSTANCE_ASPECT_NAME)) {
+        return FieldResolver.emptyFieldValue();
+      }
+      dataPlatformInstanceAspect = response.getAspects().get(DATA_PLATFORM_INSTANCE_ASPECT_NAME);
+    } catch (Exception e) {
+      log.error("Error while retrieving platform instance aspect for urn {}", entityUrn, e);
+      return FieldResolver.emptyFieldValue();
+    }
+    DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataPlatformInstanceAspect.getValue().data());
+    if (dataPlatformInstance.getInstance() == null) {
+      return FieldResolver.emptyFieldValue();
+    }
+    return FieldResolver.FieldValue.builder()
+        .values(Collections.singleton(Objects.requireNonNull(dataPlatformInstance.getInstance()).toString()))
+        .build();
+  }
+}
\ No newline at end of file
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
new file mode 100644
index 0000000000000..e525c602c2620
--- /dev/null
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
@@ -0,0 +1,188 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.ResourceFieldType;
+import com.datahub.authorization.ResourceSpec;
+import com.linkedin.common.DataPlatformInstance;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.Aspect;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.EnvelopedAspectMap;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.r2.RemoteInvocationException;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.net.URISyntaxException;
+import java.util.Collections;
+import java.util.Set;
+
+import static com.linkedin.metadata.Constants.*;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.*;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+public class DataPlatformInstanceFieldResolverProviderTest {
+
+  private static final String DATA_PLATFORM_INSTANCE_URN =
+      "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)";
+  private static final String RESOURCE_URN =
+      "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.testDataset,PROD)";
+  private static final ResourceSpec RESOURCE_SPEC = new ResourceSpec(DATASET_ENTITY_NAME, RESOURCE_URN);
+
+  @Mock
+  private EntityClient entityClientMock;
+  @Mock
+  private Authentication systemAuthenticationMock;
+
+  private DataPlatformInstanceFieldResolverProvider dataPlatformInstanceFieldResolverProvider;
+
+  @BeforeMethod
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    dataPlatformInstanceFieldResolverProvider =
+        new DataPlatformInstanceFieldResolverProvider(entityClientMock, systemAuthenticationMock);
+  }
+
+  @Test
+  public void shouldReturnDataPlatformInstanceType() {
+    assertEquals(ResourceFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType());
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithResourceSpecIfTypeIsDataPlatformInstance() {
+    var resourceSpec = new ResourceSpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, DATA_PLATFORM_INSTANCE_URN);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(resourceSpec);
+
+    assertEquals(Set.of(DATA_PLATFORM_INSTANCE_URN), result.getFieldValuesFuture().join().getValues());
+    verifyZeroInteractions(entityClientMock);
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResponseIsNull() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(null);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResourceHasNoDataPlatformInstance()
+      throws RemoteInvocationException, URISyntaxException {
+    var entityResponseMock = mock(EntityResponse.class);
+    when(entityResponseMock.getAspects()).thenReturn(new EnvelopedAspectMap());
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenThereIsAnException() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenThrow(new RemoteInvocationException());
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenDataPlatformInstanceHasNoInstance()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var dataPlatform = new DataPlatformInstance()
+        .setPlatform(Urn.createFromString("urn:li:dataPlatform:s3"));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(DATA_PLATFORM_INSTANCE_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(dataPlatform.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithDataPlatformInstanceOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var dataPlatformInstance = new DataPlatformInstance()
+        .setPlatform(Urn.createFromString("urn:li:dataPlatform:s3"))
+        .setInstance(Urn.createFromString(DATA_PLATFORM_INSTANCE_URN));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(DATA_PLATFORM_INSTANCE_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(dataPlatformInstance.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(DATA_PLATFORM_INSTANCE_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+}

From a17db676e37d90ec47f16a43ab95e0d562952939 Mon Sep 17 00:00:00 2001
From: siladitya <68184387+siladitya2@users.noreply.github.com>
Date: Wed, 11 Oct 2023 02:43:36 +0200
Subject: [PATCH 28/98] feat(graphql): Added datafetcher for
 DataPlatformInstance entity (#8935)

Co-authored-by: si-chakraborty <si.chakraborty@adevinta.com>
Co-authored-by: John Joyce <john@acryl.io>
---
 .../datahub/graphql/GmsGraphQLEngine.java     |  1 +
 .../DataPlatformInstanceType.java             | 34 ++++++++++++++++++-
 .../src/main/resources/entity.graphql         |  5 +++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index 3ba0cc1f747e3..ebb5c7d62c7d3 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -821,6 +821,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) {
             .dataFetcher("glossaryNode", getResolver(glossaryNodeType))
             .dataFetcher("domain", getResolver((domainType)))
             .dataFetcher("dataPlatform", getResolver(dataPlatformType))
+            .dataFetcher("dataPlatformInstance", getResolver(dataPlatformInstanceType))
             .dataFetcher("mlFeatureTable", getResolver(mlFeatureTableType))
             .dataFetcher("mlFeature", getResolver(mlFeatureType))
             .dataFetcher("mlPrimaryKey", getResolver(mlPrimaryKeyType))
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java
index 2423fc31ea52e..87614e1332528 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java
@@ -4,16 +4,25 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
+import com.linkedin.datahub.graphql.generated.AutoCompleteResults;
 import com.linkedin.datahub.graphql.generated.DataPlatformInstance;
 import com.linkedin.datahub.graphql.generated.Entity;
 import com.linkedin.datahub.graphql.generated.EntityType;
+import com.linkedin.datahub.graphql.generated.FacetFilterInput;
+import com.linkedin.datahub.graphql.generated.SearchResults;
 import com.linkedin.datahub.graphql.types.dataplatforminstance.mappers.DataPlatformInstanceMapper;
+import com.linkedin.datahub.graphql.types.mappers.AutoCompleteResultsMapper;
+import com.linkedin.datahub.graphql.types.SearchableEntityType;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.query.AutoCompleteResult;
+import com.linkedin.metadata.query.filter.Filter;
 import graphql.execution.DataFetcherResult;
+import org.apache.commons.lang3.NotImplementedException;
 
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@@ -22,7 +31,10 @@
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
-public class DataPlatformInstanceType implements com.linkedin.datahub.graphql.types.EntityType<DataPlatformInstance, String> {
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME;
+
+public class DataPlatformInstanceType implements SearchableEntityType<DataPlatformInstance, String>,
+        com.linkedin.datahub.graphql.types.EntityType<DataPlatformInstance, String> {
 
     static final Set<String> ASPECTS_TO_FETCH = ImmutableSet.of(
         Constants.DATA_PLATFORM_INSTANCE_KEY_ASPECT_NAME,
@@ -84,4 +96,24 @@ public List<DataFetcherResult<DataPlatformInstance>> batchLoad(@Nonnull List<Str
         }
     }
 
+    @Override
+    public SearchResults search(@Nonnull String query,
+                                @Nullable List<FacetFilterInput> filters,
+                                int start,
+                                int count,
+                                @Nonnull final QueryContext context) throws Exception {
+        throw new NotImplementedException("Searchable type (deprecated) not implemented on DataPlatformInstance entity type");
+    }
+
+    @Override
+    public AutoCompleteResults autoComplete(@Nonnull String query,
+                                            @Nullable String field,
+                                            @Nullable Filter filters,
+                                            int limit,
+                                            @Nonnull final QueryContext context) throws Exception {
+        final AutoCompleteResult result = _entityClient.autoComplete(DATA_PLATFORM_INSTANCE_ENTITY_NAME, query,
+                filters, limit, context.getAuthentication());
+        return AutoCompleteResultsMapper.map(result);
+    }
+
 }
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index 39f86948c77c4..0b15d7b875a9c 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -226,6 +226,11 @@ type Query {
     listOwnershipTypes(
         "Input required for listing custom ownership types"
         input: ListOwnershipTypesInput!): ListOwnershipTypesResult!
+
+    """
+    Fetch a Data Platform Instance by primary key (urn)
+    """
+    dataPlatformInstance(urn: String!): DataPlatformInstance
 }
 
 """

From dfcea2441e75e1eef517c0f9a4765e6e7990f297 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Wed, 11 Oct 2023 03:04:44 +0200
Subject: [PATCH 29/98] feat(config): configurable bootstrap policies file
 (#8812)

Co-authored-by: John Joyce <john@acryl.io>
---
 .../configuration/src/main/resources/application.yml   |  4 ++++
 .../boot/factories/BootstrapManagerFactory.java        |  7 ++++++-
 .../metadata/boot/steps/IngestPoliciesStep.java        | 10 +++++++---
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 4dfd96ac75c6c..d22f92adca8f9 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -276,6 +276,10 @@ bootstrap:
     enabled: ${UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED:false} # enable to run the upgrade to migrate legacy default browse paths to new ones
   backfillBrowsePathsV2:
     enabled: ${BACKFILL_BROWSE_PATHS_V2:false} # Enables running the backfill of browsePathsV2 upgrade step. There are concerns about the load of this step so hiding it behind a flag. Deprecating in favor of running through SystemUpdate
+  policies:
+    file: ${BOOTSTRAP_POLICIES_FILE:classpath:boot/policies.json}
+    # eg for local file
+    # file: "file:///datahub/datahub-gms/resources/custom-policies.json"
   servlets:
     waitTimeout: ${BOOTSTRAP_SERVLETS_WAITTIMEOUT:60} # Total waiting time in seconds for servlets to initialize
 
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
index c490f00021201..3a761bd12647e 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
@@ -31,6 +31,7 @@
 import com.linkedin.metadata.search.EntitySearchService;
 import com.linkedin.metadata.search.SearchService;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
+
 import java.util.ArrayList;
 import java.util.List;
 import javax.annotation.Nonnull;
@@ -41,6 +42,7 @@
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
 import org.springframework.context.annotation.Scope;
+import org.springframework.core.io.Resource;
 
 
 @Configuration
@@ -89,13 +91,16 @@ public class BootstrapManagerFactory {
   @Value("${bootstrap.backfillBrowsePathsV2.enabled}")
   private Boolean _backfillBrowsePathsV2Enabled;
 
+  @Value("${bootstrap.policies.file}")
+  private Resource _policiesResource;
+
   @Bean(name = "bootstrapManager")
   @Scope("singleton")
   @Nonnull
   protected BootstrapManager createInstance() {
     final IngestRootUserStep ingestRootUserStep = new IngestRootUserStep(_entityService);
     final IngestPoliciesStep ingestPoliciesStep =
-        new IngestPoliciesStep(_entityRegistry, _entityService, _entitySearchService, _searchDocumentTransformer);
+        new IngestPoliciesStep(_entityRegistry, _entityService, _entitySearchService, _searchDocumentTransformer, _policiesResource);
     final IngestRolesStep ingestRolesStep = new IngestRolesStep(_entityService, _entityRegistry);
     final IngestDataPlatformsStep ingestDataPlatformsStep = new IngestDataPlatformsStep(_entityService);
     final IngestDataPlatformInstancesStep ingestDataPlatformInstancesStep =
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
index 87dcfd736da40..cf29645214466 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
@@ -25,6 +25,7 @@
 import com.linkedin.mxe.GenericAspect;
 import com.linkedin.mxe.MetadataChangeProposal;
 import com.linkedin.policy.DataHubPolicyInfo;
+
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.util.Collections;
@@ -35,7 +36,8 @@
 
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.springframework.core.io.ClassPathResource;
+import org.springframework.core.io.Resource;
+
 
 import static com.linkedin.metadata.Constants.*;
 
@@ -52,6 +54,8 @@ public class IngestPoliciesStep implements BootstrapStep {
   private final EntitySearchService _entitySearchService;
   private final SearchDocumentTransformer _searchDocumentTransformer;
 
+  private final Resource _policiesResource;
+
   @Override
   public String name() {
     return "IngestPoliciesStep";
@@ -66,10 +70,10 @@ public void execute() throws IOException, URISyntaxException {
         .maxStringLength(maxSize).build());
 
     // 0. Execute preflight check to see whether we need to ingest policies
-    log.info("Ingesting default access policies...");
+    log.info("Ingesting default access policies from: {}...", _policiesResource);
 
     // 1. Read from the file into JSON.
-    final JsonNode policiesObj = mapper.readTree(new ClassPathResource("./boot/policies.json").getFile());
+    final JsonNode policiesObj = mapper.readTree(_policiesResource.getFile());
 
     if (!policiesObj.isArray()) {
       throw new RuntimeException(

From 10a190470e8c932b6d34cba49de7dbcba687a088 Mon Sep 17 00:00:00 2001
From: siddiquebagwan-gslab <mohdsiddique.bagwan@gslab.com>
Date: Wed, 11 Oct 2023 08:54:08 +0530
Subject: [PATCH 30/98] feat(ingestion/redshift): CLL support in redshift
 (#8921)

---
 .../ingestion/source/redshift/config.py       |   4 +
 .../ingestion/source/redshift/lineage.py      | 215 +++++++++++++-----
 .../ingestion/source/redshift/redshift.py     |   1 +
 .../tests/unit/test_redshift_lineage.py       |  95 ++++++--
 4 files changed, 234 insertions(+), 81 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index 804a14b0fe1cf..2789b800940db 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -132,6 +132,10 @@ class RedshiftConfig(
         description="Whether `schema_pattern` is matched against fully qualified schema name `<database>.<schema>`.",
     )
 
+    extract_column_level_lineage: bool = Field(
+        default=True, description="Whether to extract column level lineage."
+    )
+
     @root_validator(pre=True)
     def check_email_is_set_on_usage(cls, values):
         if values.get("include_usage_statistics"):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
index bbe52b5d98ba3..c9ddfbe92ab2a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
@@ -9,10 +9,12 @@
 
 import humanfriendly
 import redshift_connector
-from sqllineage.runner import LineageRunner
 
+import datahub.emitter.mce_builder as builder
+import datahub.utilities.sqlglot_lineage as sqlglot_l
 from datahub.emitter import mce_builder
 from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
+from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.aws.s3_util import strip_s3_prefix
 from datahub.ingestion.source.redshift.common import get_db_name
 from datahub.ingestion.source.redshift.config import LineageMode, RedshiftConfig
@@ -28,13 +30,19 @@
 from datahub.ingestion.source.state.redundant_run_skip_handler import (
     RedundantLineageRunSkipHandler,
 )
-from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
+from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
+    FineGrainedLineage,
+    FineGrainedLineageDownstreamType,
+    FineGrainedLineageUpstreamType,
+    UpstreamLineage,
+)
 from datahub.metadata.schema_classes import (
     DatasetLineageTypeClass,
     UpstreamClass,
     UpstreamLineageClass,
 )
 from datahub.utilities import memory_footprint
+from datahub.utilities.urns import dataset_urn
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -56,13 +64,14 @@ class LineageCollectorType(Enum):
 @dataclass(frozen=True, eq=True)
 class LineageDataset:
     platform: LineageDatasetPlatform
-    path: str
+    urn: str
 
 
 @dataclass()
 class LineageItem:
     dataset: LineageDataset
     upstreams: Set[LineageDataset]
+    cll: Optional[List[sqlglot_l.ColumnLineageInfo]]
     collector_type: LineageCollectorType
     dataset_lineage_type: str = field(init=False)
 
@@ -83,10 +92,12 @@ def __init__(
         self,
         config: RedshiftConfig,
         report: RedshiftReport,
+        context: PipelineContext,
         redundant_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = None,
     ):
         self.config = config
         self.report = report
+        self.context = context
         self._lineage_map: Dict[str, LineageItem] = defaultdict()
 
         self.redundant_run_skip_handler = redundant_run_skip_handler
@@ -121,33 +132,37 @@ def _get_s3_path(self, path: str) -> str:
 
         return path
 
-    def _get_sources_from_query(self, db_name: str, query: str) -> List[LineageDataset]:
+    def _get_sources_from_query(
+        self, db_name: str, query: str
+    ) -> Tuple[List[LineageDataset], Optional[List[sqlglot_l.ColumnLineageInfo]]]:
         sources: List[LineageDataset] = list()
 
-        parser = LineageRunner(query)
+        parsed_result: Optional[
+            sqlglot_l.SqlParsingResult
+        ] = sqlglot_l.create_lineage_sql_parsed_result(
+            query=query,
+            platform=LineageDatasetPlatform.REDSHIFT.value,
+            platform_instance=self.config.platform_instance,
+            database=db_name,
+            schema=str(self.config.default_schema),
+            graph=self.context.graph,
+            env=self.config.env,
+        )
 
-        for table in parser.source_tables:
-            split = str(table).split(".")
-            if len(split) == 3:
-                db_name, source_schema, source_table = split
-            elif len(split) == 2:
-                source_schema, source_table = split
-            else:
-                raise ValueError(
-                    f"Invalid table name {table} in query {query}. "
-                    f"Expected format: [db_name].[schema].[table] or [schema].[table] or [table]."
-                )
+        if parsed_result is None:
+            logger.debug(f"native query parsing failed for {query}")
+            return sources, None
 
-            if source_schema == "<default>":
-                source_schema = str(self.config.default_schema)
+        logger.debug(f"parsed_result = {parsed_result}")
 
+        for table_urn in parsed_result.in_tables:
             source = LineageDataset(
                 platform=LineageDatasetPlatform.REDSHIFT,
-                path=f"{db_name}.{source_schema}.{source_table}",
+                urn=table_urn,
             )
             sources.append(source)
 
-        return sources
+        return sources, parsed_result.column_lineage
 
     def _build_s3_path_from_row(self, filename: str) -> str:
         path = filename.strip()
@@ -165,9 +180,11 @@ def _get_sources(
         source_table: Optional[str],
         ddl: Optional[str],
         filename: Optional[str],
-    ) -> List[LineageDataset]:
+    ) -> Tuple[List[LineageDataset], Optional[List[sqlglot_l.ColumnLineageInfo]]]:
         sources: List[LineageDataset] = list()
         # Source
+        cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None
+
         if (
             lineage_type
             in {
@@ -177,7 +194,7 @@ def _get_sources(
             and ddl is not None
         ):
             try:
-                sources = self._get_sources_from_query(db_name=db_name, query=ddl)
+                sources, cll = self._get_sources_from_query(db_name=db_name, query=ddl)
             except Exception as e:
                 logger.warning(
                     f"Error parsing query {ddl} for getting lineage. Error was {e}."
@@ -192,22 +209,38 @@ def _get_sources(
                         "Only s3 source supported with copy. The source was: {path}."
                     )
                     self.report.num_lineage_dropped_not_support_copy_path += 1
-                    return sources
+                    return sources, cll
                 path = strip_s3_prefix(self._get_s3_path(path))
+                urn = make_dataset_urn_with_platform_instance(
+                    platform=platform.value,
+                    name=path,
+                    env=self.config.env,
+                    platform_instance=self.config.platform_instance_map.get(
+                        platform.value
+                    )
+                    if self.config.platform_instance_map is not None
+                    else None,
+                )
             elif source_schema is not None and source_table is not None:
                 platform = LineageDatasetPlatform.REDSHIFT
                 path = f"{db_name}.{source_schema}.{source_table}"
+                urn = make_dataset_urn_with_platform_instance(
+                    platform=platform.value,
+                    platform_instance=self.config.platform_instance,
+                    name=path,
+                    env=self.config.env,
+                )
             else:
-                return []
+                return [], cll
 
             sources = [
                 LineageDataset(
                     platform=platform,
-                    path=path,
+                    urn=urn,
                 )
             ]
 
-        return sources
+        return sources, cll
 
     def _populate_lineage_map(
         self,
@@ -231,6 +264,7 @@ def _populate_lineage_map(
         :rtype: None
         """
         try:
+            cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None
             raw_db_name = database
             alias_db_name = get_db_name(self.config)
 
@@ -243,7 +277,7 @@ def _populate_lineage_map(
                 if not target:
                     continue
 
-                sources = self._get_sources(
+                sources, cll = self._get_sources(
                     lineage_type,
                     alias_db_name,
                     source_schema=lineage_row.source_schema,
@@ -251,6 +285,7 @@ def _populate_lineage_map(
                     ddl=lineage_row.ddl,
                     filename=lineage_row.filename,
                 )
+                target.cll = cll
 
                 target.upstreams.update(
                     self._get_upstream_lineages(
@@ -262,20 +297,16 @@ def _populate_lineage_map(
                 )
 
                 # Merging downstreams if dataset already exists and has downstreams
-                if target.dataset.path in self._lineage_map:
-                    self._lineage_map[
-                        target.dataset.path
-                    ].upstreams = self._lineage_map[
-                        target.dataset.path
-                    ].upstreams.union(
-                        target.upstreams
-                    )
+                if target.dataset.urn in self._lineage_map:
+                    self._lineage_map[target.dataset.urn].upstreams = self._lineage_map[
+                        target.dataset.urn
+                    ].upstreams.union(target.upstreams)
 
                 else:
-                    self._lineage_map[target.dataset.path] = target
+                    self._lineage_map[target.dataset.urn] = target
 
                 logger.debug(
-                    f"Lineage[{target}]:{self._lineage_map[target.dataset.path]}"
+                    f"Lineage[{target}]:{self._lineage_map[target.dataset.urn]}"
                 )
         except Exception as e:
             self.warn(
@@ -308,17 +339,34 @@ def _get_target_lineage(
                 target_platform = LineageDatasetPlatform.S3
                 # Following call requires 'filename' key in lineage_row
                 target_path = self._build_s3_path_from_row(lineage_row.filename)
+                urn = make_dataset_urn_with_platform_instance(
+                    platform=target_platform.value,
+                    name=target_path,
+                    env=self.config.env,
+                    platform_instance=self.config.platform_instance_map.get(
+                        target_platform.value
+                    )
+                    if self.config.platform_instance_map is not None
+                    else None,
+                )
             except ValueError as e:
                 self.warn(logger, "non-s3-lineage", str(e))
                 return None
         else:
             target_platform = LineageDatasetPlatform.REDSHIFT
             target_path = f"{alias_db_name}.{lineage_row.target_schema}.{lineage_row.target_table}"
+            urn = make_dataset_urn_with_platform_instance(
+                platform=target_platform.value,
+                platform_instance=self.config.platform_instance,
+                name=target_path,
+                env=self.config.env,
+            )
 
         return LineageItem(
-            dataset=LineageDataset(platform=target_platform, path=target_path),
+            dataset=LineageDataset(platform=target_platform, urn=urn),
             upstreams=set(),
             collector_type=lineage_type,
+            cll=None,
         )
 
     def _get_upstream_lineages(
@@ -331,11 +379,22 @@ def _get_upstream_lineages(
         targe_source = []
         for source in sources:
             if source.platform == LineageDatasetPlatform.REDSHIFT:
-                db, schema, table = source.path.split(".")
+                qualified_table_name = dataset_urn.DatasetUrn.create_from_string(
+                    source.urn
+                ).get_entity_id()[1]
+                db, schema, table = qualified_table_name.split(".")
                 if db == raw_db_name:
                     db = alias_db_name
                     path = f"{db}.{schema}.{table}"
-                    source = LineageDataset(platform=source.platform, path=path)
+                    source = LineageDataset(
+                        platform=source.platform,
+                        urn=make_dataset_urn_with_platform_instance(
+                            platform=LineageDatasetPlatform.REDSHIFT.value,
+                            platform_instance=self.config.platform_instance,
+                            name=path,
+                            env=self.config.env,
+                        ),
+                    )
 
                 # Filtering out tables which does not exist in Redshift
                 # It was deleted in the meantime or query parser did not capture well the table name
@@ -345,7 +404,7 @@ def _get_upstream_lineages(
                     or not any(table == t.name for t in all_tables[db][schema])
                 ):
                     logger.debug(
-                        f"{source.path} missing table, dropping from lineage.",
+                        f"{source.urn} missing table, dropping from lineage.",
                     )
                     self.report.num_lineage_tables_dropped += 1
                     continue
@@ -433,36 +492,73 @@ def populate_lineage(
             memory_footprint.total_size(self._lineage_map)
         )
 
+    def make_fine_grained_lineage_class(
+        self, lineage_item: LineageItem, dataset_urn: str
+    ) -> List[FineGrainedLineage]:
+        fine_grained_lineages: List[FineGrainedLineage] = []
+
+        if (
+            self.config.extract_column_level_lineage is False
+            or lineage_item.cll is None
+        ):
+            logger.debug("CLL extraction is disabled")
+            return fine_grained_lineages
+
+        logger.debug("Extracting column level lineage")
+
+        cll: List[sqlglot_l.ColumnLineageInfo] = lineage_item.cll
+
+        for cll_info in cll:
+            downstream = (
+                [builder.make_schema_field_urn(dataset_urn, cll_info.downstream.column)]
+                if cll_info.downstream is not None
+                and cll_info.downstream.column is not None
+                else []
+            )
+
+            upstreams = [
+                builder.make_schema_field_urn(column_ref.table, column_ref.column)
+                for column_ref in cll_info.upstreams
+            ]
+
+            fine_grained_lineages.append(
+                FineGrainedLineage(
+                    downstreamType=FineGrainedLineageDownstreamType.FIELD,
+                    downstreams=downstream,
+                    upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
+                    upstreams=upstreams,
+                )
+            )
+
+        logger.debug(f"Created fine_grained_lineage for {dataset_urn}")
+
+        return fine_grained_lineages
+
     def get_lineage(
         self,
         table: Union[RedshiftTable, RedshiftView],
         dataset_urn: str,
         schema: RedshiftSchema,
     ) -> Optional[Tuple[UpstreamLineageClass, Dict[str, str]]]:
-        dataset_key = mce_builder.dataset_urn_to_key(dataset_urn)
-        if dataset_key is None:
-            return None
 
         upstream_lineage: List[UpstreamClass] = []
 
-        if dataset_key.name in self._lineage_map:
-            item = self._lineage_map[dataset_key.name]
+        cll_lineage: List[FineGrainedLineage] = []
+
+        if dataset_urn in self._lineage_map:
+            item = self._lineage_map[dataset_urn]
             for upstream in item.upstreams:
                 upstream_table = UpstreamClass(
-                    dataset=make_dataset_urn_with_platform_instance(
-                        upstream.platform.value,
-                        upstream.path,
-                        platform_instance=self.config.platform_instance_map.get(
-                            upstream.platform.value
-                        )
-                        if self.config.platform_instance_map
-                        else None,
-                        env=self.config.env,
-                    ),
+                    dataset=upstream.urn,
                     type=item.dataset_lineage_type,
                 )
                 upstream_lineage.append(upstream_table)
 
+            cll_lineage = self.make_fine_grained_lineage_class(
+                lineage_item=item,
+                dataset_urn=dataset_urn,
+            )
+
         tablename = table.name
         if table.type == "EXTERNAL_TABLE":
             # external_db_params = schema.option
@@ -489,7 +585,12 @@ def get_lineage(
         else:
             return None
 
-        return UpstreamLineage(upstreams=upstream_lineage), {}
+        return (
+            UpstreamLineage(
+                upstreams=upstream_lineage, fineGrainedLineages=cll_lineage or None
+            ),
+            {},
+        )
 
     def report_status(self, step: str, status: bool) -> None:
         if self.redundant_run_skip_handler:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
index e8a8ff976afa6..a1b6333a3775d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
@@ -881,6 +881,7 @@ def extract_lineage(
         self.lineage_extractor = RedshiftLineageExtractor(
             config=self.config,
             report=self.report,
+            context=self.ctx,
             redundant_run_skip_handler=self.redundant_lineage_run_skip_handler,
         )
 
diff --git a/metadata-ingestion/tests/unit/test_redshift_lineage.py b/metadata-ingestion/tests/unit/test_redshift_lineage.py
index c7d6ac18e044c..db5af3a71efb9 100644
--- a/metadata-ingestion/tests/unit/test_redshift_lineage.py
+++ b/metadata-ingestion/tests/unit/test_redshift_lineage.py
@@ -1,6 +1,8 @@
+from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.redshift.config import RedshiftConfig
 from datahub.ingestion.source.redshift.lineage import RedshiftLineageExtractor
 from datahub.ingestion.source.redshift.report import RedshiftReport
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
 
 def test_get_sources_from_query():
@@ -10,14 +12,20 @@ def test_get_sources_from_query():
     test_query = """
         select * from my_schema.my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.my_schema.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.my_schema.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_only_table_name():
@@ -27,14 +35,20 @@ def test_get_sources_from_query_with_only_table_name():
     test_query = """
         select * from my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.public.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_database():
@@ -44,14 +58,20 @@ def test_get_sources_from_query_with_database():
     test_query = """
         select * from test.my_schema.my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.my_schema.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.my_schema.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_non_default_database():
@@ -61,14 +81,20 @@ def test_get_sources_from_query_with_non_default_database():
     test_query = """
         select * from test2.my_schema.my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test2.my_schema.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test2.my_schema.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_only_table():
@@ -78,27 +104,48 @@ def test_get_sources_from_query_with_only_table():
     test_query = """
         select * from my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.public.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.my_table,PROD)"
+    )
 
 
-def test_get_sources_from_query_with_four_part_table_should_throw_exception():
+def test_cll():
     config = RedshiftConfig(host_port="localhost:5439", database="test")
     report = RedshiftReport()
 
     test_query = """
-        select * from database.schema.my_table.test
+        select a,b,c from db.public.customer inner join db.public.order on db.public.customer.id = db.public.order.customer_id
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    try:
-        lineage_extractor._get_sources_from_query(db_name="test", query=test_query)
-    except ValueError:
-        pass
-
-    assert f"{test_query} should have thrown a ValueError exception but it didn't"
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    _, cll = lineage_extractor._get_sources_from_query(db_name="db", query=test_query)
+
+    assert cll == [
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="a"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="b"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="c"),
+            upstreams=[],
+            logic=None,
+        ),
+    ]

From 4b6b941a2abf13854511c9af0e88a17d5acfd5e6 Mon Sep 17 00:00:00 2001
From: Harsha Mandadi <115464537+harsha-mandadi-4026@users.noreply.github.com>
Date: Wed, 11 Oct 2023 19:01:46 +0100
Subject: [PATCH 31/98] fix(ingest): Fix postgres lineage within views (#8906)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
Co-authored-by: Maggie Hays <maggiem.hays@gmail.com>
---
 .../datahub/ingestion/source/sql/postgres.py   | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
index ba8655b83446d..a6a9d8e2c8597 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
@@ -217,14 +217,15 @@ def _get_view_lineage_elements(
             key = (lineage.dependent_view, lineage.dependent_schema)
             # Append the source table to the list.
             lineage_elements[key].append(
-                mce_builder.make_dataset_urn(
-                    self.platform,
-                    self.get_identifier(
+                mce_builder.make_dataset_urn_with_platform_instance(
+                    platform=self.platform,
+                    name=self.get_identifier(
                         schema=lineage.source_schema,
                         entity=lineage.source_table,
                         inspector=inspector,
                     ),
-                    self.config.env,
+                    platform_instance=self.config.platform_instance,
+                    env=self.config.env,
                 )
             )
 
@@ -244,12 +245,13 @@ def _get_view_lineage_workunits(
             dependent_view, dependent_schema = key
 
             # Construct a lineage object.
-            urn = mce_builder.make_dataset_urn(
-                self.platform,
-                self.get_identifier(
+            urn = mce_builder.make_dataset_urn_with_platform_instance(
+                platform=self.platform,
+                name=self.get_identifier(
                     schema=dependent_schema, entity=dependent_view, inspector=inspector
                 ),
-                self.config.env,
+                platform_instance=self.config.platform_instance,
+                env=self.config.env,
             )
 
             # use the mce_builder to ensure that the change proposal inherits

From 932fbcddbf7c3201898e0918218e80c9246b0cd2 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 11 Oct 2023 14:17:02 -0400
Subject: [PATCH 32/98] refactor(ingest/dbt): move dbt tests logic to dedicated
 file (#8984)

---
 .../src/datahub/ingestion/api/common.py       |   9 +
 .../datahub/ingestion/source/csv_enricher.py  |   8 +-
 .../datahub/ingestion/source/dbt/dbt_cloud.py |   3 +-
 .../ingestion/source/dbt/dbt_common.py        | 278 +-----------------
 .../datahub/ingestion/source/dbt/dbt_core.py  |   3 +-
 .../datahub/ingestion/source/dbt/dbt_tests.py | 261 ++++++++++++++++
 6 files changed, 288 insertions(+), 274 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py

diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py
index 778bd119615e2..a6761a3c77d5e 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/common.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/common.py
@@ -2,6 +2,7 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, Generic, Iterable, Optional, Tuple, TypeVar
 
+from datahub.configuration.common import ConfigurationError
 from datahub.emitter.mce_builder import set_dataset_urn_to_lower
 from datahub.ingestion.api.committable import Committable
 from datahub.ingestion.graph.client import DataHubGraph
@@ -75,3 +76,11 @@ def register_checkpointer(self, committable: Committable) -> None:
 
     def get_committables(self) -> Iterable[Tuple[str, Committable]]:
         yield from self.checkpointers.items()
+
+    def require_graph(self, operation: Optional[str] = None) -> DataHubGraph:
+        if not self.graph:
+            raise ConfigurationError(
+                f"{operation or 'This operation'} requires a graph, but none was provided. "
+                "To provide one, either use the datahub-rest sink or set the top-level datahub_api config in the recipe."
+            )
+        return self.graph
diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
index 7cb487a86d931..611f0c5c52cc6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
@@ -129,11 +129,9 @@ def __init__(self, config: CSVEnricherConfig, ctx: PipelineContext):
         # Map from entity urn to a list of SubResourceRow.
         self.editable_schema_metadata_map: Dict[str, List[SubResourceRow]] = {}
         self.should_overwrite: bool = self.config.write_semantics == "OVERRIDE"
-        if not self.should_overwrite and not self.ctx.graph:
-            raise ConfigurationError(
-                "With PATCH semantics, the csv-enricher source requires a datahub_api to connect to. "
-                "Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe."
-            )
+
+        if not self.should_overwrite:
+            self.ctx.require_graph(operation="The csv-enricher's PATCH semantics flag")
 
     def get_resource_glossary_terms_work_unit(
         self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
index af9769bc9d94c..da1ea8ecb4678 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
@@ -20,9 +20,8 @@
     DBTCommonConfig,
     DBTNode,
     DBTSourceBase,
-    DBTTest,
-    DBTTestResult,
 )
+from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult
 
 logger = logging.getLogger(__name__)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 0f5c08eb6ac54..48d2118a9b091 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -1,11 +1,10 @@
-import json
 import logging
 import re
 from abc import abstractmethod
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import auto
-from typing import Any, Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 import pydantic
 from pydantic import root_validator, validator
@@ -34,6 +33,12 @@
 from datahub.ingestion.api.source import MetadataWorkUnitProcessor
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.common.subtypes import DatasetSubTypes
+from datahub.ingestion.source.dbt.dbt_tests import (
+    DBTTest,
+    DBTTestResult,
+    make_assertion_from_test,
+    make_assertion_result_from_test,
+)
 from datahub.ingestion.source.sql.sql_types import (
     ATHENA_SQL_TYPES_MAP,
     BIGQUERY_TYPES_MAP,
@@ -81,20 +86,7 @@
     TimeTypeClass,
 )
 from datahub.metadata.schema_classes import (
-    AssertionInfoClass,
-    AssertionResultClass,
-    AssertionResultTypeClass,
-    AssertionRunEventClass,
-    AssertionRunStatusClass,
-    AssertionStdAggregationClass,
-    AssertionStdOperatorClass,
-    AssertionStdParameterClass,
-    AssertionStdParametersClass,
-    AssertionStdParameterTypeClass,
-    AssertionTypeClass,
     DataPlatformInstanceClass,
-    DatasetAssertionInfoClass,
-    DatasetAssertionScopeClass,
     DatasetPropertiesClass,
     GlobalTagsClass,
     GlossaryTermsClass,
@@ -551,134 +543,6 @@ def get_column_type(
     return SchemaFieldDataType(type=TypeClass())
 
 
-@dataclass
-class AssertionParams:
-    scope: Union[DatasetAssertionScopeClass, str]
-    operator: Union[AssertionStdOperatorClass, str]
-    aggregation: Union[AssertionStdAggregationClass, str]
-    parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None
-    logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None
-
-
-def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
-    """
-    Try to produce a useful string for the name of a relationship constraint.
-    Return None if we fail to
-    """
-    destination_ref = kw_args.get("to")
-    source_ref = kw_args.get("model")
-    column_name = kw_args.get("column_name")
-    dest_field_name = kw_args.get("field")
-    if not destination_ref or not source_ref or not column_name or not dest_field_name:
-        # base assertions are violated, bail early
-        return None
-    m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref)
-    if m:
-        destination_table = m.group(1)
-    else:
-        destination_table = destination_ref
-    m = re.search(r"ref\(\'(.*)\'\)", source_ref)
-    if m:
-        source_table = m.group(1)
-    else:
-        source_table = source_ref
-    return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}"
-
-
-@dataclass
-class DBTTest:
-    qualified_test_name: str
-    column_name: Optional[str]
-    kw_args: dict
-
-    TEST_NAME_TO_ASSERTION_MAP: ClassVar[Dict[str, AssertionParams]] = {
-        "not_null": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.NOT_NULL,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-        ),
-        "unique": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.EQUAL_TO,
-            aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,
-            parameters=lambda _: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value="1.0",
-                    type=AssertionStdParameterTypeClass.NUMBER,
-                )
-            ),
-        ),
-        "accepted_values": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.IN,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda kw_args: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value=json.dumps(kw_args.get("values")),
-                    type=AssertionStdParameterTypeClass.SET,
-                ),
-            ),
-        ),
-        "relationships": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass._NATIVE_,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda kw_args: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value=json.dumps(kw_args.get("values")),
-                    type=AssertionStdParameterTypeClass.SET,
-                ),
-            ),
-            logic_fn=_get_name_for_relationship_test,
-        ),
-        "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.NOT_NULL,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-        ),
-        "dbt_expectations.expect_column_values_to_be_between": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.BETWEEN,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda x: AssertionStdParametersClass(
-                minValue=AssertionStdParameterClass(
-                    value=str(x.get("min_value", "unknown")),
-                    type=AssertionStdParameterTypeClass.NUMBER,
-                ),
-                maxValue=AssertionStdParameterClass(
-                    value=str(x.get("max_value", "unknown")),
-                    type=AssertionStdParameterTypeClass.NUMBER,
-                ),
-            ),
-        ),
-        "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.IN,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda kw_args: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value=json.dumps(kw_args.get("value_set")),
-                    type=AssertionStdParameterTypeClass.SET,
-                ),
-            ),
-        ),
-    }
-
-
-@dataclass
-class DBTTestResult:
-    invocation_id: str
-
-    status: str
-    execution_time: datetime
-
-    native_results: Dict[str, str]
-
-
-def string_map(input_map: Dict[str, Any]) -> Dict[str, str]:
-    return {k: str(v) for k, v in input_map.items()}
-
-
 @platform_name("dbt")
 @config_class(DBTCommonConfig)
 @support_status(SupportStatus.CERTIFIED)
@@ -750,7 +614,7 @@ def create_test_entity_mcps(
 
             for upstream_urn in sorted(upstream_urns):
                 if self.config.entities_enabled.can_emit_node_type("test"):
-                    yield self._make_assertion_from_test(
+                    yield make_assertion_from_test(
                         custom_props,
                         node,
                         assertion_urn,
@@ -759,133 +623,17 @@ def create_test_entity_mcps(
 
                 if node.test_result:
                     if self.config.entities_enabled.can_emit_test_results:
-                        yield self._make_assertion_result_from_test(
-                            node, assertion_urn, upstream_urn
+                        yield make_assertion_result_from_test(
+                            node,
+                            assertion_urn,
+                            upstream_urn,
+                            test_warnings_are_errors=self.config.test_warnings_are_errors,
                         )
                     else:
                         logger.debug(
                             f"Skipping test result {node.name} emission since it is turned off."
                         )
 
-    def _make_assertion_from_test(
-        self,
-        extra_custom_props: Dict[str, str],
-        node: DBTNode,
-        assertion_urn: str,
-        upstream_urn: str,
-    ) -> MetadataWorkUnit:
-        assert node.test_info
-        qualified_test_name = node.test_info.qualified_test_name
-        column_name = node.test_info.column_name
-        kw_args = node.test_info.kw_args
-
-        if qualified_test_name in DBTTest.TEST_NAME_TO_ASSERTION_MAP:
-            assertion_params = DBTTest.TEST_NAME_TO_ASSERTION_MAP[qualified_test_name]
-            assertion_info = AssertionInfoClass(
-                type=AssertionTypeClass.DATASET,
-                customProperties=extra_custom_props,
-                datasetAssertion=DatasetAssertionInfoClass(
-                    dataset=upstream_urn,
-                    scope=assertion_params.scope,
-                    operator=assertion_params.operator,
-                    fields=[
-                        mce_builder.make_schema_field_urn(upstream_urn, column_name)
-                    ]
-                    if (
-                        assertion_params.scope
-                        == DatasetAssertionScopeClass.DATASET_COLUMN
-                        and column_name
-                    )
-                    else [],
-                    nativeType=node.name,
-                    aggregation=assertion_params.aggregation,
-                    parameters=assertion_params.parameters(kw_args)
-                    if assertion_params.parameters
-                    else None,
-                    logic=assertion_params.logic_fn(kw_args)
-                    if assertion_params.logic_fn
-                    else None,
-                    nativeParameters=string_map(kw_args),
-                ),
-            )
-        elif column_name:
-            # no match with known test types, column-level test
-            assertion_info = AssertionInfoClass(
-                type=AssertionTypeClass.DATASET,
-                customProperties=extra_custom_props,
-                datasetAssertion=DatasetAssertionInfoClass(
-                    dataset=upstream_urn,
-                    scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-                    operator=AssertionStdOperatorClass._NATIVE_,
-                    fields=[
-                        mce_builder.make_schema_field_urn(upstream_urn, column_name)
-                    ],
-                    nativeType=node.name,
-                    logic=node.compiled_code or node.raw_code,
-                    aggregation=AssertionStdAggregationClass._NATIVE_,
-                    nativeParameters=string_map(kw_args),
-                ),
-            )
-        else:
-            # no match with known test types, default to row-level test
-            assertion_info = AssertionInfoClass(
-                type=AssertionTypeClass.DATASET,
-                customProperties=extra_custom_props,
-                datasetAssertion=DatasetAssertionInfoClass(
-                    dataset=upstream_urn,
-                    scope=DatasetAssertionScopeClass.DATASET_ROWS,
-                    operator=AssertionStdOperatorClass._NATIVE_,
-                    logic=node.compiled_code or node.raw_code,
-                    nativeType=node.name,
-                    aggregation=AssertionStdAggregationClass._NATIVE_,
-                    nativeParameters=string_map(kw_args),
-                ),
-            )
-
-        wu = MetadataChangeProposalWrapper(
-            entityUrn=assertion_urn,
-            aspect=assertion_info,
-        ).as_workunit()
-
-        return wu
-
-    def _make_assertion_result_from_test(
-        self,
-        node: DBTNode,
-        assertion_urn: str,
-        upstream_urn: str,
-    ) -> MetadataWorkUnit:
-        assert node.test_result
-        test_result = node.test_result
-
-        assertionResult = AssertionRunEventClass(
-            timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
-            assertionUrn=assertion_urn,
-            asserteeUrn=upstream_urn,
-            runId=test_result.invocation_id,
-            result=AssertionResultClass(
-                type=AssertionResultTypeClass.SUCCESS
-                if test_result.status == "pass"
-                or (
-                    not self.config.test_warnings_are_errors
-                    and test_result.status == "warn"
-                )
-                else AssertionResultTypeClass.FAILURE,
-                nativeResults=test_result.native_results,
-            ),
-            status=AssertionRunStatusClass.COMPLETE,
-        )
-
-        event = MetadataChangeProposalWrapper(
-            entityUrn=assertion_urn,
-            aspect=assertionResult,
-        )
-        wu = MetadataWorkUnit(
-            id=f"{assertion_urn}-assertionRunEvent-{upstream_urn}",
-            mcp=event,
-        )
-        return wu
-
     @abstractmethod
     def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]:
         # return dbt nodes + global custom properties
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
index c08295ed1dc59..dc3a84847beb2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
@@ -26,9 +26,8 @@
     DBTNode,
     DBTSourceBase,
     DBTSourceReport,
-    DBTTest,
-    DBTTestResult,
 )
+from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult
 
 logger = logging.getLogger(__name__)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py
new file mode 100644
index 0000000000000..721769d214d9e
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py
@@ -0,0 +1,261 @@
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
+
+from datahub.emitter import mce_builder
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionResultClass,
+    AssertionResultTypeClass,
+    AssertionRunEventClass,
+    AssertionRunStatusClass,
+    AssertionStdAggregationClass,
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+    AssertionTypeClass,
+    DatasetAssertionInfoClass,
+    DatasetAssertionScopeClass,
+)
+
+if TYPE_CHECKING:
+    from datahub.ingestion.source.dbt.dbt_common import DBTNode
+
+
+@dataclass
+class DBTTest:
+    qualified_test_name: str
+    column_name: Optional[str]
+    kw_args: dict
+
+
+@dataclass
+class DBTTestResult:
+    invocation_id: str
+
+    status: str
+    execution_time: datetime
+
+    native_results: Dict[str, str]
+
+
+def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
+    """
+    Try to produce a useful string for the name of a relationship constraint.
+    Return None if we fail to
+    """
+    destination_ref = kw_args.get("to")
+    source_ref = kw_args.get("model")
+    column_name = kw_args.get("column_name")
+    dest_field_name = kw_args.get("field")
+    if not destination_ref or not source_ref or not column_name or not dest_field_name:
+        # base assertions are violated, bail early
+        return None
+    m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref)
+    if m:
+        destination_table = m.group(1)
+    else:
+        destination_table = destination_ref
+    m = re.search(r"ref\(\'(.*)\'\)", source_ref)
+    if m:
+        source_table = m.group(1)
+    else:
+        source_table = source_ref
+    return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}"
+
+
+@dataclass
+class AssertionParams:
+    scope: Union[DatasetAssertionScopeClass, str]
+    operator: Union[AssertionStdOperatorClass, str]
+    aggregation: Union[AssertionStdAggregationClass, str]
+    parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None
+    logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None
+
+
+_DBT_TEST_NAME_TO_ASSERTION_MAP: Dict[str, AssertionParams] = {
+    "not_null": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.NOT_NULL,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+    ),
+    "unique": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.EQUAL_TO,
+        aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,
+        parameters=lambda _: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value="1.0",
+                type=AssertionStdParameterTypeClass.NUMBER,
+            )
+        ),
+    ),
+    "accepted_values": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.IN,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda kw_args: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value=json.dumps(kw_args.get("values")),
+                type=AssertionStdParameterTypeClass.SET,
+            ),
+        ),
+    ),
+    "relationships": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass._NATIVE_,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda kw_args: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value=json.dumps(kw_args.get("values")),
+                type=AssertionStdParameterTypeClass.SET,
+            ),
+        ),
+        logic_fn=_get_name_for_relationship_test,
+    ),
+    "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.NOT_NULL,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+    ),
+    "dbt_expectations.expect_column_values_to_be_between": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.BETWEEN,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda x: AssertionStdParametersClass(
+            minValue=AssertionStdParameterClass(
+                value=str(x.get("min_value", "unknown")),
+                type=AssertionStdParameterTypeClass.NUMBER,
+            ),
+            maxValue=AssertionStdParameterClass(
+                value=str(x.get("max_value", "unknown")),
+                type=AssertionStdParameterTypeClass.NUMBER,
+            ),
+        ),
+    ),
+    "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.IN,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda kw_args: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value=json.dumps(kw_args.get("value_set")),
+                type=AssertionStdParameterTypeClass.SET,
+            ),
+        ),
+    ),
+}
+
+
+def _string_map(input_map: Dict[str, Any]) -> Dict[str, str]:
+    return {k: str(v) for k, v in input_map.items()}
+
+
+def make_assertion_from_test(
+    extra_custom_props: Dict[str, str],
+    node: "DBTNode",
+    assertion_urn: str,
+    upstream_urn: str,
+) -> MetadataWorkUnit:
+    assert node.test_info
+    qualified_test_name = node.test_info.qualified_test_name
+    column_name = node.test_info.column_name
+    kw_args = node.test_info.kw_args
+
+    if qualified_test_name in _DBT_TEST_NAME_TO_ASSERTION_MAP:
+        assertion_params = _DBT_TEST_NAME_TO_ASSERTION_MAP[qualified_test_name]
+        assertion_info = AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            customProperties=extra_custom_props,
+            datasetAssertion=DatasetAssertionInfoClass(
+                dataset=upstream_urn,
+                scope=assertion_params.scope,
+                operator=assertion_params.operator,
+                fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)]
+                if (
+                    assertion_params.scope == DatasetAssertionScopeClass.DATASET_COLUMN
+                    and column_name
+                )
+                else [],
+                nativeType=node.name,
+                aggregation=assertion_params.aggregation,
+                parameters=assertion_params.parameters(kw_args)
+                if assertion_params.parameters
+                else None,
+                logic=assertion_params.logic_fn(kw_args)
+                if assertion_params.logic_fn
+                else None,
+                nativeParameters=_string_map(kw_args),
+            ),
+        )
+    elif column_name:
+        # no match with known test types, column-level test
+        assertion_info = AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            customProperties=extra_custom_props,
+            datasetAssertion=DatasetAssertionInfoClass(
+                dataset=upstream_urn,
+                scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+                operator=AssertionStdOperatorClass._NATIVE_,
+                fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)],
+                nativeType=node.name,
+                logic=node.compiled_code or node.raw_code,
+                aggregation=AssertionStdAggregationClass._NATIVE_,
+                nativeParameters=_string_map(kw_args),
+            ),
+        )
+    else:
+        # no match with known test types, default to row-level test
+        assertion_info = AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            customProperties=extra_custom_props,
+            datasetAssertion=DatasetAssertionInfoClass(
+                dataset=upstream_urn,
+                scope=DatasetAssertionScopeClass.DATASET_ROWS,
+                operator=AssertionStdOperatorClass._NATIVE_,
+                logic=node.compiled_code or node.raw_code,
+                nativeType=node.name,
+                aggregation=AssertionStdAggregationClass._NATIVE_,
+                nativeParameters=_string_map(kw_args),
+            ),
+        )
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=assertion_urn,
+        aspect=assertion_info,
+    ).as_workunit()
+
+
+def make_assertion_result_from_test(
+    node: "DBTNode",
+    assertion_urn: str,
+    upstream_urn: str,
+    test_warnings_are_errors: bool,
+) -> MetadataWorkUnit:
+    assert node.test_result
+    test_result = node.test_result
+
+    assertionResult = AssertionRunEventClass(
+        timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
+        assertionUrn=assertion_urn,
+        asserteeUrn=upstream_urn,
+        runId=test_result.invocation_id,
+        result=AssertionResultClass(
+            type=AssertionResultTypeClass.SUCCESS
+            if test_result.status == "pass"
+            or (not test_warnings_are_errors and test_result.status == "warn")
+            else AssertionResultTypeClass.FAILURE,
+            nativeResults=test_result.native_results,
+        ),
+        status=AssertionRunStatusClass.COMPLETE,
+    )
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=assertion_urn,
+        aspect=assertionResult,
+    ).as_workunit()

From 1b06c6a30c8d6c0ee57f75f75ee6a436aa6c13a7 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 12 Oct 2023 00:31:42 +0530
Subject: [PATCH 33/98] fix(ingest/snowflake): fix sample fraction for very
 large tables (#8988)

---
 .../datahub/ingestion/source/snowflake/snowflake_profiler.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
index 24275dcdff34d..8e18d85d6f3ca 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
@@ -86,7 +86,7 @@ def get_batch_kwargs(
             # Fixed-size sampling can be slower than equivalent fraction-based sampling
             # as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations
             sample_pc = 100 * self.config.profiling.sample_size / table.rows_count
-            custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})'
+            custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.8f})'
         return {
             **super().get_batch_kwargs(table, schema_name, db_name),
             # Lowercase/Mixedcase table names in Snowflake do not work by default.

From 245284ec6c6b754b22943ba42d7139ddd5772377 Mon Sep 17 00:00:00 2001
From: jayasimhankv <145704974+jayasimhankv@users.noreply.github.com>
Date: Wed, 11 Oct 2023 17:40:20 -0500
Subject: [PATCH 34/98] fix(): Display generic not found page for corp groups
 that do not exist (#8880)

Co-authored-by: Jay Kadambi <jayasimhan_venkatadri@optum.com>
---
 .../java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java  | 3 ++-
 datahub-graphql-core/src/main/resources/entity.graphql       | 5 +++++
 datahub-web-react/src/app/entity/group/GroupProfile.tsx      | 4 ++++
 datahub-web-react/src/graphql/group.graphql                  | 1 +
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index ebb5c7d62c7d3..b99f712034fe0 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -1292,7 +1292,8 @@ private void configureCorpUserResolvers(final RuntimeWiring.Builder builder) {
      */
     private void configureCorpGroupResolvers(final RuntimeWiring.Builder builder) {
         builder.type("CorpGroup", typeWiring -> typeWiring
-            .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)));
+            .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
+            .dataFetcher("exists", new EntityExistsResolver(entityService)));
         builder.type("CorpGroupInfo", typeWiring -> typeWiring
                 .dataFetcher("admins",
                     new LoadableTypeBatchResolver<>(corpUserType,
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index 0b15d7b875a9c..b37a8f34fa056 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -3788,6 +3788,11 @@ type CorpGroup implements Entity {
     Additional read only info about the group
     """
     info: CorpGroupInfo @deprecated
+
+    """
+    Whether or not this entity exists on DataHub
+    """
+    exists: Boolean
 }
 
 """
diff --git a/datahub-web-react/src/app/entity/group/GroupProfile.tsx b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
index d5e284af931df..53d2062277dec 100644
--- a/datahub-web-react/src/app/entity/group/GroupProfile.tsx
+++ b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
@@ -11,6 +11,7 @@ import { RoutedTabs } from '../../shared/RoutedTabs';
 import GroupInfoSidebar from './GroupInfoSideBar';
 import { GroupAssets } from './GroupAssets';
 import { ErrorSection } from '../../shared/error/ErrorSection';
+import NonExistentEntityPage from '../shared/entity/NonExistentEntityPage';
 
 const messageStyle = { marginTop: '10%' };
 
@@ -110,6 +111,9 @@ export default function GroupProfile() {
         urn,
     };
 
+    if (data?.corpGroup?.exists === false) {
+        return <NonExistentEntityPage />;
+    }
     return (
         <>
             {error && <ErrorSection />}
diff --git a/datahub-web-react/src/graphql/group.graphql b/datahub-web-react/src/graphql/group.graphql
index 9aa6e2b005f16..1007721e51a4e 100644
--- a/datahub-web-react/src/graphql/group.graphql
+++ b/datahub-web-react/src/graphql/group.graphql
@@ -3,6 +3,7 @@ query getGroup($urn: String!, $membersCount: Int!) {
         urn
         type
         name
+        exists
         origin {
             type
             externalType

From 245c5c00087116d236acf7a9bbddbdb4dee15949 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Thu, 12 Oct 2023 02:06:19 +0200
Subject: [PATCH 35/98] fix(ingest/looker): stop emitting tag owner (#8942)

---
 docs/how/updating-datahub.md                  |  2 +
 .../ingestion/source/looker/looker_common.py  | 13 +-----
 .../looker/golden_looker_mces.json            | 42 -------------------
 .../looker/golden_test_allow_ingest.json      | 42 -------------------
 ...olden_test_external_project_view_mces.json | 42 -------------------
 .../looker/golden_test_file_path_ingest.json  | 42 -------------------
 .../golden_test_independent_look_ingest.json  | 42 -------------------
 .../looker/golden_test_ingest.json            | 42 -------------------
 .../looker/golden_test_ingest_joins.json      | 42 -------------------
 .../golden_test_ingest_unaliased_joins.json   | 42 -------------------
 .../looker_mces_golden_deleted_stateful.json  | 42 -------------------
 .../looker/looker_mces_usage_history.json     | 42 -------------------
 .../lookml/lookml_mces_api_bigquery.json      | 42 -------------------
 .../lookml/lookml_mces_api_hive2.json         | 42 -------------------
 .../lookml/lookml_mces_badsql_parser.json     | 42 -------------------
 .../lookml/lookml_mces_offline.json           | 42 -------------------
 .../lookml_mces_offline_deny_pattern.json     | 42 -------------------
 ...lookml_mces_offline_platform_instance.json | 42 -------------------
 .../lookml_mces_with_external_urls.json       | 42 -------------------
 .../lookml/lookml_reachable_views.json        | 42 -------------------
 20 files changed, 3 insertions(+), 768 deletions(-)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 5d0ad5eaf8f7e..9cd4ad5c6f02d 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -7,6 +7,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 ### Breaking Changes
 
 - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
+- #8942 - Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted 
+  by Looker and LookML source connectors.
 - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
 - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
 - #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 89b1e45695c57..30c38720dd96c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -81,9 +81,6 @@
     EnumTypeClass,
     FineGrainedLineageClass,
     GlobalTagsClass,
-    OwnerClass,
-    OwnershipClass,
-    OwnershipTypeClass,
     SchemaMetadataClass,
     StatusClass,
     SubTypesClass,
@@ -453,17 +450,9 @@ def _get_schema(
     @staticmethod
     def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent:
         assert tag_urn in LookerUtil.tag_definitions
-        ownership = OwnershipClass(
-            owners=[
-                OwnerClass(
-                    owner="urn:li:corpuser:datahub",
-                    type=OwnershipTypeClass.DATAOWNER,
-                )
-            ]
-        )
         return MetadataChangeEvent(
             proposedSnapshot=TagSnapshotClass(
-                urn=tag_urn, aspects=[ownership, LookerUtil.tag_definitions[tag_urn]]
+                urn=tag_urn, aspects=[LookerUtil.tag_definitions[tag_urn]]
             )
         )
 
diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
index dee85b40bb7a8..1da42b94e320c 100644
--- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
@@ -533,20 +533,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -566,20 +552,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -599,20 +571,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
index 72db36e63daf7..685a606a57c33 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
index e5508bdb06b9e..069788cb088ac 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
index b0f66e7b245c9..f1c932ebd5a70 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
@@ -335,20 +335,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -369,20 +355,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -403,20 +375,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
index 91e13debfa028..9521c9af4bbdc 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
@@ -550,20 +550,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -583,20 +569,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -616,20 +588,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
index e93079119e4f4..dbacd52fe83de 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
index a9c8efa7cdb98..aaa874d9ff348 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
@@ -351,20 +351,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -384,20 +370,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -417,20 +389,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
index edd15624a14cd..be8db0722aea3 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
@@ -343,20 +343,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -376,20 +362,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -409,20 +381,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
index aebc89b609a08..05b74f163ad45 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
index 34bded3cf691e..0778aa0050b00 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
@@ -279,20 +279,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -312,20 +298,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -345,20 +317,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
index 238f4c2580cdf..5a0bd4e12fd3a 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
index 45d5d839e9d21..1b0ee3216383c 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
index 187cedaefb6b2..b960ba581e6b5 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
@@ -2004,20 +2004,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2037,20 +2023,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2070,20 +2042,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
index c2c879e38f37b..e29292a44c949 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
index c1ac54b0fb588..04ecaecbd4afb 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
@@ -584,20 +584,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -617,20 +603,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -650,20 +622,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
index f602ca37b3160..080931ae637bc 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
index 104bd365669e3..5826c4316b539 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
@@ -2134,20 +2134,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2167,20 +2153,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2200,20 +2172,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
index 37a6c94c6952e..53d1ec0229de1 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
@@ -681,20 +681,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -714,20 +700,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -747,20 +719,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",

From 84bba4dc446ee97f8991689fd17bfa6d14232601 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 12 Oct 2023 01:31:17 -0400
Subject: [PATCH 36/98] feat(ingest): add output schema inference for sql
 parser (#8989)

---
 .../src/datahub/utilities/sqlglot_lineage.py  | 119 ++++++++++++++++--
 .../integration/powerbi/test_m_parser.py      |  93 ++++----------
 .../test_bigquery_create_view_with_cte.json   |  32 ++++-
 ..._bigquery_from_sharded_table_wildcard.json |  16 ++-
 .../test_bigquery_nested_subqueries.json      |  16 ++-
 ..._bigquery_sharded_table_normalization.json |  16 ++-
 .../test_bigquery_star_with_replace.json      |  24 +++-
 .../test_bigquery_view_from_union.json        |  16 ++-
 .../goldens/test_create_view_as_select.json   |  16 ++-
 .../test_expand_select_star_basic.json        |  80 ++++++++++--
 .../goldens/test_insert_as_select.json        |  36 +++++-
 ...est_select_ambiguous_column_no_schema.json |  12 +-
 .../goldens/test_select_count.json            |   8 +-
 .../test_select_from_struct_subfields.json    |  16 ++-
 .../goldens/test_select_from_union.json       |  16 ++-
 .../sql_parsing/goldens/test_select_max.json  |   4 +-
 .../goldens/test_select_with_ctes.json        |   8 +-
 .../test_select_with_full_col_name.json       |  12 +-
 .../test_snowflake_case_statement.json        |  16 ++-
 .../goldens/test_snowflake_column_cast.json   |  63 ++++++++++
 .../test_snowflake_column_normalization.json  |  32 ++++-
 ...t_snowflake_ctas_column_normalization.json |  32 ++++-
 .../test_snowflake_default_normalization.json |  48 ++++++-
 .../unit/sql_parsing/test_sqlglot_lineage.py  |  21 ++++
 24 files changed, 604 insertions(+), 148 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json

diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 81c43884fdf7d..349eb40a5e865 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -5,12 +5,13 @@
 import logging
 import pathlib
 from collections import defaultdict
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 import pydantic.dataclasses
 import sqlglot
 import sqlglot.errors
 import sqlglot.lineage
+import sqlglot.optimizer.annotate_types
 import sqlglot.optimizer.qualify
 import sqlglot.optimizer.qualify_columns
 from pydantic import BaseModel
@@ -23,7 +24,17 @@
 from datahub.ingestion.api.closeable import Closeable
 from datahub.ingestion.graph.client import DataHubGraph
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
-from datahub.metadata.schema_classes import OperationTypeClass, SchemaMetadataClass
+from datahub.metadata.schema_classes import (
+    ArrayTypeClass,
+    BooleanTypeClass,
+    DateTypeClass,
+    NumberTypeClass,
+    OperationTypeClass,
+    SchemaFieldDataTypeClass,
+    SchemaMetadataClass,
+    StringTypeClass,
+    TimeTypeClass,
+)
 from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
@@ -90,8 +101,18 @@ def get_query_type_of_sql(expression: sqlglot.exp.Expression) -> QueryType:
     return QueryType.UNKNOWN
 
 
+class _ParserBaseModel(
+    BaseModel,
+    arbitrary_types_allowed=True,
+    json_encoders={
+        SchemaFieldDataTypeClass: lambda v: v.to_obj(),
+    },
+):
+    pass
+
+
 @functools.total_ordering
-class _FrozenModel(BaseModel, frozen=True):
+class _FrozenModel(_ParserBaseModel, frozen=True):
     def __lt__(self, other: "_FrozenModel") -> bool:
         for field in self.__fields__:
             self_v = getattr(self, field)
@@ -146,29 +167,42 @@ class _ColumnRef(_FrozenModel):
     column: str
 
 
-class ColumnRef(BaseModel):
+class ColumnRef(_ParserBaseModel):
     table: Urn
     column: str
 
 
-class _DownstreamColumnRef(BaseModel):
+class _DownstreamColumnRef(_ParserBaseModel):
     table: Optional[_TableName]
     column: str
+    column_type: Optional[sqlglot.exp.DataType]
 
 
-class DownstreamColumnRef(BaseModel):
+class DownstreamColumnRef(_ParserBaseModel):
     table: Optional[Urn]
     column: str
+    column_type: Optional[SchemaFieldDataTypeClass]
+    native_column_type: Optional[str]
+
+    @pydantic.validator("column_type", pre=True)
+    def _load_column_type(
+        cls, v: Optional[Union[dict, SchemaFieldDataTypeClass]]
+    ) -> Optional[SchemaFieldDataTypeClass]:
+        if v is None:
+            return None
+        if isinstance(v, SchemaFieldDataTypeClass):
+            return v
+        return SchemaFieldDataTypeClass.from_obj(v)
 
 
-class _ColumnLineageInfo(BaseModel):
+class _ColumnLineageInfo(_ParserBaseModel):
     downstream: _DownstreamColumnRef
     upstreams: List[_ColumnRef]
 
     logic: Optional[str]
 
 
-class ColumnLineageInfo(BaseModel):
+class ColumnLineageInfo(_ParserBaseModel):
     downstream: DownstreamColumnRef
     upstreams: List[ColumnRef]
 
@@ -176,7 +210,7 @@ class ColumnLineageInfo(BaseModel):
     logic: Optional[str] = pydantic.Field(default=None, exclude=True)
 
 
-class SqlParsingDebugInfo(BaseModel, arbitrary_types_allowed=True):
+class SqlParsingDebugInfo(_ParserBaseModel):
     confidence: float = 0.0
 
     tables_discovered: int = 0
@@ -190,7 +224,7 @@ def error(self) -> Optional[Exception]:
         return self.table_error or self.column_error
 
 
-class SqlParsingResult(BaseModel):
+class SqlParsingResult(_ParserBaseModel):
     query_type: QueryType = QueryType.UNKNOWN
 
     in_tables: List[Urn]
@@ -541,6 +575,15 @@ def _schema_aware_fuzzy_column_resolve(
         ) from e
     logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect))
 
+    # Try to figure out the types of the output columns.
+    try:
+        statement = sqlglot.optimizer.annotate_types.annotate_types(
+            statement, schema=sqlglot_db_schema
+        )
+    except sqlglot.errors.OptimizeError as e:
+        # This is not a fatal error, so we can continue.
+        logger.debug("sqlglot failed to annotate types: %s", e)
+
     column_lineage = []
 
     try:
@@ -553,7 +596,6 @@ def _schema_aware_fuzzy_column_resolve(
         logger.debug("output columns: %s", [col[0] for col in output_columns])
         output_col: str
         for output_col, original_col_expression in output_columns:
-            # print(f"output column: {output_col}")
             if output_col == "*":
                 # If schema information is available, the * will be expanded to the actual columns.
                 # Otherwise, we can't process it.
@@ -613,12 +655,19 @@ def _schema_aware_fuzzy_column_resolve(
 
             output_col = _schema_aware_fuzzy_column_resolve(output_table, output_col)
 
+            # Guess the output column type.
+            output_col_type = None
+            if original_col_expression.type:
+                output_col_type = original_col_expression.type
+
             if not direct_col_upstreams:
                 logger.debug(f'  "{output_col}" has no upstreams')
             column_lineage.append(
                 _ColumnLineageInfo(
                     downstream=_DownstreamColumnRef(
-                        table=output_table, column=output_col
+                        table=output_table,
+                        column=output_col,
+                        column_type=output_col_type,
                     ),
                     upstreams=sorted(direct_col_upstreams),
                     # logic=column_logic.sql(pretty=True, dialect=dialect),
@@ -673,6 +722,42 @@ def _try_extract_select(
     return statement
 
 
+def _translate_sqlglot_type(
+    sqlglot_type: sqlglot.exp.DataType.Type,
+) -> Optional[SchemaFieldDataTypeClass]:
+    TypeClass: Any
+    if sqlglot_type in sqlglot.exp.DataType.TEXT_TYPES:
+        TypeClass = StringTypeClass
+    elif sqlglot_type in sqlglot.exp.DataType.NUMERIC_TYPES or sqlglot_type in {
+        sqlglot.exp.DataType.Type.DECIMAL,
+    }:
+        TypeClass = NumberTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.BOOLEAN,
+        sqlglot.exp.DataType.Type.BIT,
+    }:
+        TypeClass = BooleanTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.DATE,
+    }:
+        TypeClass = DateTypeClass
+    elif sqlglot_type in sqlglot.exp.DataType.TEMPORAL_TYPES:
+        TypeClass = TimeTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.ARRAY,
+    }:
+        TypeClass = ArrayTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.UNKNOWN,
+    }:
+        return None
+    else:
+        logger.debug("Unknown sqlglot type: %s", sqlglot_type)
+        return None
+
+    return SchemaFieldDataTypeClass(type=TypeClass())
+
+
 def _translate_internal_column_lineage(
     table_name_urn_mapping: Dict[_TableName, str],
     raw_column_lineage: _ColumnLineageInfo,
@@ -684,6 +769,16 @@ def _translate_internal_column_lineage(
         downstream=DownstreamColumnRef(
             table=downstream_urn,
             column=raw_column_lineage.downstream.column,
+            column_type=_translate_sqlglot_type(
+                raw_column_lineage.downstream.column_type.this
+            )
+            if raw_column_lineage.downstream.column_type
+            else None,
+            native_column_type=raw_column_lineage.downstream.column_type.sql()
+            if raw_column_lineage.downstream.column_type
+            and raw_column_lineage.downstream.column_type.this
+            != sqlglot.exp.DataType.Type.UNKNOWN
+            else None,
         ),
         upstreams=[
             ColumnRef(
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index e3cc6c8101650..b6cb578217a2c 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -17,7 +17,6 @@
 )
 from datahub.ingestion.source.powerbi.m_query import parser, resolver, tree_function
 from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage
-from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
 pytestmark = pytest.mark.integration_batch_2
 
@@ -742,75 +741,25 @@ def test_sqlglot_parser():
         == "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.operations_analytics.transformed_prod.v_sme_unit_targets,PROD)"
     )
 
-    assert lineage[0].column_lineage == [
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="client_director"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="tier"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column='upper("manager")'),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="team_type"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="date_target"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="monthid"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="target_team"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="seller_email"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="agent_key"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="sme_quota"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="revenue_quota"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="service_quota"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="bl_target"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="software_quota"),
-            upstreams=[],
-            logic=None,
-        ),
+    # TODO: None of these columns have upstreams?
+    # That doesn't seem right - we probably need to add fake schemas for the two tables above.
+    cols = [
+        "client_director",
+        "tier",
+        'upper("manager")',
+        "team_type",
+        "date_target",
+        "monthid",
+        "target_team",
+        "seller_email",
+        "agent_key",
+        "sme_quota",
+        "revenue_quota",
+        "service_quota",
+        "bl_target",
+        "software_quota",
     ]
+    for i, column in enumerate(cols):
+        assert lineage[0].column_lineage[i].downstream.table is None
+        assert lineage[0].column_lineage[i].downstream.column == column
+        assert lineage[0].column_lineage[i].upstreams == []
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
index e50d944ce72e3..f0175b4dc8892 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
@@ -12,7 +12,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col5"
+                "column": "col5",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -24,7 +30,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -36,7 +48,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -48,7 +66,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col3"
+                "column": "col3",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
index 78591286feb50..b7df5444987f2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
index 0e93d31fbb6a6..67e306bebf545 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
index 78591286feb50..b7df5444987f2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
index 17a801a63e3ff..b393b2445d6c4 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
@@ -10,7 +10,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -22,7 +28,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -34,7 +46,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
-                "column": "something"
+                "column": "something",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
index fd8a586ac74ac..53fb94300e804 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
@@ -11,7 +11,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)",
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -27,7 +33,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)",
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
index 1ca56840531e4..ff452467aa5bd 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
@@ -10,7 +10,9 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
-                "column": "Department"
+                "column": "Department",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -22,14 +24,22 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
-                "column": "Employees"
+                "column": "Employees",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
-                "column": "Salary"
+                "column": "Salary",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
index e241bdd08e243..eecb2265eaec5 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_agg"
+                "column": "total_agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderkey"
+                "column": "orderkey",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -32,7 +44,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "custkey"
+                "column": "custkey",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -44,7 +62,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderstatus"
+                "column": "orderstatus",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -56,7 +80,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "totalprice"
+                "column": "totalprice",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
@@ -68,7 +98,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderdate"
+                "column": "orderdate",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                    }
+                },
+                "native_column_type": "DATE"
             },
             "upstreams": [
                 {
@@ -80,7 +116,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderpriority"
+                "column": "orderpriority",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -92,7 +134,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "clerk"
+                "column": "clerk",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -104,7 +152,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "shippriority"
+                "column": "shippriority",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -116,7 +170,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "comment"
+                "column": "comment",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
index d7264fd2db6b2..326db47e7ab33 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
@@ -18,21 +18,27 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "i_item_desc"
+                "column": "i_item_desc",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "w_warehouse_name"
+                "column": "w_warehouse_name",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "d_week_seq"
+                "column": "d_week_seq",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -44,7 +50,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "no_promo"
+                "column": "no_promo",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
@@ -56,7 +68,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "promo"
+                "column": "promo",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
@@ -68,7 +86,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "total_cnt"
+                "column": "total_cnt",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": []
         }
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
index 10f5ee20b0c1f..b5fd5eebeb1b1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
@@ -9,21 +9,27 @@
         {
             "downstream": {
                 "table": null,
-                "column": "a"
+                "column": "a",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": null,
-                "column": "b"
+                "column": "b",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": null,
-                "column": "c"
+                "column": "c",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         }
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
index 9f6eeae46c294..a67c944822138 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)"
+                "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
index 109de96180422..5ad847e252497 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "post_id"
+                "column": "post_id",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -20,7 +26,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "id"
+                "column": "id",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -32,7 +40,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "min_metric"
+                "column": "min_metric",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
index 2340b2e95b0d0..902aa010c8afc 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
@@ -9,14 +9,26 @@
         {
             "downstream": {
                 "table": null,
-                "column": "label"
+                "column": "label",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR"
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": null,
-                "column": "total_agg"
+                "column": "total_agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
index 326c07d332c26..6ea88f45847ce 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
@@ -8,7 +8,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "max_col"
+                "column": "max_col",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
index 3e02314d6e8c3..67e9fd2d21a0e 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
@@ -9,7 +9,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "COL1"
+                "column": "COL1",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -21,7 +23,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "COL3"
+                "column": "COL3",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
index c12ad23b2f03b..6ee3d2e61c39b 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "post_id"
+                "column": "post_id",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -20,7 +26,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "id"
+                "column": "id",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
index 64cd80e9a2d69..a876824127ec1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_price_category"
+                "column": "total_price_category",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_price_success"
+                "column": "total_price_success",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json
new file mode 100644
index 0000000000000..7545e2b3269dc
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json
@@ -0,0 +1,63 @@
+{
+    "query_type": "SELECT",
+    "in_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
+    ],
+    "out_tables": [],
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": null,
+                "column": "orderkey",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL(20, 0)"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                    "column": "o_orderkey"
+                }
+            ]
+        },
+        {
+            "downstream": {
+                "table": null,
+                "column": "total_cast_int",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "INT"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                    "column": "o_totalprice"
+                }
+            ]
+        },
+        {
+            "downstream": {
+                "table": null,
+                "column": "total_cast_float",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL(16, 4)"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                    "column": "o_totalprice"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
index 7b22a46757e39..84e6b053000f1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_agg"
+                "column": "total_agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_avg"
+                "column": "total_avg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -32,7 +44,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_min"
+                "column": "total_min",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
@@ -44,7 +62,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_max"
+                "column": "total_max",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
index c912d99a3a8a3..39c94cf83c561 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
@@ -10,7 +10,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "Total_Agg"
+                "column": "Total_Agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -22,7 +28,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "total_avg"
+                "column": "total_avg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -34,7 +46,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "TOTAL_MIN"
+                "column": "TOTAL_MIN",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
@@ -46,7 +64,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "total_max"
+                "column": "total_max",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
index 2af308ec60623..dbf5b1b9a4453 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
@@ -11,7 +11,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "user_fk"
+                "column": "user_fk",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL(38, 0)"
             },
             "upstreams": [
                 {
@@ -23,7 +29,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "email"
+                "column": "email",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR(16777216)"
             },
             "upstreams": [
                 {
@@ -35,7 +47,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "last_purchase_date"
+                "column": "last_purchase_date",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                    }
+                },
+                "native_column_type": "DATE"
             },
             "upstreams": [
                 {
@@ -47,7 +65,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "lifetime_purchase_amount"
+                "column": "lifetime_purchase_amount",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -59,7 +83,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "lifetime_purchase_count"
+                "column": "lifetime_purchase_count",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
@@ -71,7 +101,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "average_purchase_amount"
+                "column": "average_purchase_amount",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 2a965a9bb1e61..bb6e5f1581754 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -608,4 +608,25 @@ def test_snowflake_default_normalization():
     )
 
 
+def test_snowflake_column_cast():
+    assert_sql_result(
+        """
+SELECT
+    o.o_orderkey::NUMBER(20,0) as orderkey,
+    CAST(o.o_totalprice AS INT) as total_cast_int,
+    CAST(o.o_totalprice AS NUMBER(16,4)) as total_cast_float
+FROM snowflake_sample_data.tpch_sf1.orders o
+LIMIT 10
+""",
+        dialect="snowflake",
+        schemas={
+            "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": {
+                "orderkey": "NUMBER(38,0)",
+                "totalprice": "NUMBER(12,2)",
+            },
+        },
+        expected_file=RESOURCE_DIR / "test_snowflake_column_cast.json",
+    )
+
+
 # TODO: Add a test for setting platform_instance or env

From dd418de76d96fb41c9064261cdba37bc2af85309 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Thu, 12 Oct 2023 13:10:59 +0200
Subject: [PATCH 37/98] fix(ingest/bigquery): Fix shard regexp to match without
 underscore as well (#8934)

---
 .../ingestion/source/bigquery_v2/bigquery.py  |  1 +
 .../source/bigquery_v2/bigquery_audit.py      | 27 ++++++++++++++-----
 .../ingestion/source/bigquery_v2/queries.py   |  8 +++---
 .../ingestion/source_config/bigquery.py       |  8 +++++-
 .../tests/unit/test_bigquery_source.py        | 10 ++++---
 .../unit/test_bigqueryv2_usage_source.py      |  4 +--
 6 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index b4a04d96b532b..e577c2bac8bbd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -1057,6 +1057,7 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]:
                         ):
                             field.description = col.comment
                             schema_fields[idx] = field
+                            break
             else:
                 tags = []
                 if col.is_partition_column:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
index b0ac77201b415..88060a9cdc91d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
@@ -20,7 +20,13 @@
 
 logger: logging.Logger = logging.getLogger(__name__)
 
-_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = "((.+)[_$])?(\\d{8})$"
+# Regexp for sharded tables.
+# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date.
+# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid
+# then it is not a sharded table.
+_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = (
+    "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$"
+)
 
 
 @dataclass(frozen=True, order=True)
@@ -40,7 +46,7 @@ class BigqueryTableIdentifier:
     _BQ_SHARDED_TABLE_SUFFIX: str = "_yyyymmdd"
 
     @staticmethod
-    def get_table_and_shard(table_name: str) -> Tuple[str, Optional[str]]:
+    def get_table_and_shard(table_name: str) -> Tuple[Optional[str], Optional[str]]:
         """
         Args:
             table_name:
@@ -53,16 +59,25 @@ def get_table_and_shard(table_name: str) -> Tuple[str, Optional[str]]:
                 In case of non-sharded tables, returns (<table-id>, None)
                 In case of sharded tables, returns (<table-prefix>, shard)
         """
+        new_table_name = table_name
         match = re.match(
             BigqueryTableIdentifier._BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX,
             table_name,
             re.IGNORECASE,
         )
         if match:
-            table_name = match.group(2)
-            shard = match.group(3)
-            return table_name, shard
-        return table_name, None
+            shard: str = match[3]
+            if shard:
+                if table_name.endswith(shard):
+                    new_table_name = table_name[: -len(shard)]
+
+            new_table_name = (
+                new_table_name.rstrip("_") if new_table_name else new_table_name
+            )
+            if new_table_name.endswith("."):
+                new_table_name = table_name
+            return (new_table_name, shard) if new_table_name else (None, shard)
+        return new_table_name, None
 
     @classmethod
     def from_string_name(cls, table: str) -> "BigqueryTableIdentifier":
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
index a87cb8c1cbfa5..67fcc33cdf218 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
@@ -51,8 +51,8 @@ class BigqueryQuery:
   p.max_partition_id,
   p.active_billable_bytes,
   p.long_term_billable_bytes,
-  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
-  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
+  REGEXP_EXTRACT(t.table_name, r"(?:(?:.+\\D)[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$") as table_suffix,
+  REGEXP_REPLACE(t.table_name, r"(?:[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$", "") as table_base
 
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
@@ -92,8 +92,8 @@ class BigqueryQuery:
   tos.OPTION_VALUE as comment,
   t.is_insertable_into,
   t.ddl,
-  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
-  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
+  REGEXP_EXTRACT(t.table_name, r"(?:(?:.+\\D)[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$") as table_suffix,
+  REGEXP_REPLACE(t.table_name, r"(?:[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$", "") as table_base
 
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py
index 8ca1296d819c1..0a73bb5203e72 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py
@@ -4,7 +4,13 @@
 
 from datahub.configuration.common import ConfigModel, ConfigurationError
 
-_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = "((.+)[_$])?(\\d{8})$"
+# Regexp for sharded tables.
+# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date.
+# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid
+# then it is not a sharded table.
+_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = (
+    "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$"
+)
 
 
 class BigQueryBaseConfig(ConfigModel):
diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py
index e9e91361f49f4..5a11a933c8595 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_source.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_source.py
@@ -765,11 +765,14 @@ def test_gen_view_dataset_workunits(
         ("project.dataset.table_20231215", "project.dataset.table", "20231215"),
         ("project.dataset.table_2023", "project.dataset.table_2023", None),
         # incorrectly handled special case where dataset itself is a sharded table if full name is specified
-        ("project.dataset.20231215", "project.dataset.20231215", None),
+        ("project.dataset.20231215", "project.dataset.20231215", "20231215"),
+        ("project1.dataset2.20231215", "project1.dataset2.20231215", "20231215"),
         # Cases with Just the table name as input
         ("table", "table", None),
-        ("table20231215", "table20231215", None),
+        ("table20231215", "table", "20231215"),
         ("table_20231215", "table", "20231215"),
+        ("table2_20231215", "table2", "20231215"),
+        ("table220231215", "table220231215", None),
         ("table_1624046611000_name", "table_1624046611000_name", None),
         ("table_1624046611000", "table_1624046611000", None),
         # Special case where dataset itself is a sharded table
@@ -801,7 +804,6 @@ def test_get_table_and_shard_default(
         ("project.dataset.2023", "project.dataset.2023", None),
         # Cases with Just the table name as input
         ("table", "table", None),
-        ("table20231215", "table20231215", None),
         ("table_20231215", "table", "20231215"),
         ("table_2023", "table", "2023"),
         ("table_1624046611000_name", "table_1624046611000_name", None),
@@ -842,7 +844,7 @@ def test_get_table_and_shard_custom_shard_pattern(
             "project.dataset.table_1624046611000_name",
         ),
         ("project.dataset.table_1624046611000", "project.dataset.table_1624046611000"),
-        ("project.dataset.table20231215", "project.dataset.table20231215"),
+        ("project.dataset.table20231215", "project.dataset.table"),
         ("project.dataset.table_*", "project.dataset.table"),
         ("project.dataset.table_2023*", "project.dataset.table"),
         ("project.dataset.table_202301*", "project.dataset.table"),
diff --git a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
index 4cf42da4395f9..44fd840f28d59 100644
--- a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
+++ b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
@@ -144,10 +144,10 @@ def test_bigquery_table_sanitasitation():
     assert new_table_ref.dataset == "dataset-4567"
 
     table_ref = BigQueryTableRef(
-        BigqueryTableIdentifier("project-1234", "dataset-4567", "foo_20222110")
+        BigqueryTableIdentifier("project-1234", "dataset-4567", "foo_20221210")
     )
     new_table_identifier = table_ref.table_identifier
-    assert new_table_identifier.table == "foo_20222110"
+    assert new_table_identifier.table == "foo_20221210"
     assert new_table_identifier.is_sharded_table()
     assert new_table_identifier.get_table_display_name() == "foo"
     assert new_table_identifier.project_id == "project-1234"

From c381806110ae995dd2164305394ee4e1d131e033 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Thu, 12 Oct 2023 13:56:30 +0200
Subject: [PATCH 38/98] feat(ingestion): Adding config option to auto lowercase
 dataset urns (#8928)

---
 .../datahub/configuration/source_common.py    |  7 ++
 .../src/datahub/ingestion/api/source.py       | 24 +++++++
 .../datahub/ingestion/api/source_helpers.py   | 20 +++++-
 .../ingestion/source/bigquery_v2/bigquery.py  |  3 -
 .../source/bigquery_v2/bigquery_config.py     |  5 --
 .../src/datahub/ingestion/source/kafka.py     | 11 ++-
 .../ingestion/source/sql/sql_config.py        | 11 ++-
 .../datahub/ingestion/source/unity/config.py  |  6 +-
 .../src/datahub/utilities/urns/urn_iter.py    | 33 +++++++--
 .../api/source_helpers/test_source_helpers.py | 70 +++++++++++++++++++
 10 files changed, 170 insertions(+), 20 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py
index a9f891ddb7b1e..80b6ceb576c1c 100644
--- a/metadata-ingestion/src/datahub/configuration/source_common.py
+++ b/metadata-ingestion/src/datahub/configuration/source_common.py
@@ -54,6 +54,13 @@ class DatasetSourceConfigMixin(PlatformInstanceConfigMixin, EnvConfigMixin):
     """
 
 
+class LowerCaseDatasetUrnConfigMixin(ConfigModel):
+    convert_urns_to_lowercase: bool = Field(
+        default=False,
+        description="Whether to convert dataset urns to lowercase.",
+    )
+
+
 class DatasetLineageProviderConfigBase(EnvConfigMixin):
     """
     Any non-Dataset source that produces lineage to Datasets should inherit this class.
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index 0bcc220cad49b..b86844b1c4c83 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -29,6 +29,7 @@
 from datahub.ingestion.api.report import Report
 from datahub.ingestion.api.source_helpers import (
     auto_browse_path_v2,
+    auto_lowercase_urns,
     auto_materialize_referenced_tags,
     auto_status_aspect,
     auto_workunit_reporter,
@@ -192,7 +193,30 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
                 self.ctx.pipeline_config.flags.generate_browse_path_v2_dry_run
             )
 
+        auto_lowercase_dataset_urns: Optional[MetadataWorkUnitProcessor] = None
+        if (
+            self.ctx.pipeline_config
+            and self.ctx.pipeline_config.source
+            and self.ctx.pipeline_config.source.config
+            and (
+                (
+                    hasattr(
+                        self.ctx.pipeline_config.source.config,
+                        "convert_urns_to_lowercase",
+                    )
+                    and self.ctx.pipeline_config.source.config.convert_urns_to_lowercase
+                )
+                or (
+                    hasattr(self.ctx.pipeline_config.source.config, "get")
+                    and self.ctx.pipeline_config.source.config.get(
+                        "convert_urns_to_lowercase"
+                    )
+                )
+            )
+        ):
+            auto_lowercase_dataset_urns = auto_lowercase_urns
         return [
+            auto_lowercase_dataset_urns,
             auto_status_aspect,
             auto_materialize_referenced_tags,
             browse_path_processor,
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 7fc15cf829678..2ce9e07bc57bc 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -35,7 +35,7 @@
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.tag_urn import TagUrn
 from datahub.utilities.urns.urn import guess_entity_type
-from datahub.utilities.urns.urn_iter import list_urns
+from datahub.utilities.urns.urn_iter import list_urns, lowercase_dataset_urns
 
 if TYPE_CHECKING:
     from datahub.ingestion.api.source import SourceReport
@@ -70,7 +70,6 @@ def auto_status_aspect(
     for wu in stream:
         urn = wu.get_urn()
         all_urns.add(urn)
-
         if not wu.is_primary_source:
             # If this is a non-primary source, we pretend like we've seen the status
             # aspect so that we don't try to emit a removal for it.
@@ -173,6 +172,23 @@ def auto_materialize_referenced_tags(
         ).as_workunit()
 
 
+def auto_lowercase_urns(
+    stream: Iterable[MetadataWorkUnit],
+) -> Iterable[MetadataWorkUnit]:
+    """Lowercase all dataset urns"""
+
+    for wu in stream:
+        try:
+            old_urn = wu.get_urn()
+            lowercase_dataset_urns(wu.metadata)
+            wu.id = wu.id.replace(old_urn, wu.get_urn())
+
+            yield wu
+        except Exception as e:
+            logger.warning(f"Failed to lowercase urns for {wu}: {e}", exc_info=True)
+            yield wu
+
+
 def auto_browse_path_v2(
     stream: Iterable[MetadataWorkUnit],
     *,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index e577c2bac8bbd..552612f877b9a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -16,7 +16,6 @@
     make_dataplatform_instance_urn,
     make_dataset_urn,
     make_tag_urn,
-    set_dataset_urn_to_lower,
 )
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey
@@ -218,8 +217,6 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
         if self.config.enable_legacy_sharded_table_support:
             BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = ""
 
-        set_dataset_urn_to_lower(self.config.convert_urns_to_lowercase)
-
         self.bigquery_data_dictionary = BigQuerySchemaApi(
             self.report.schema_api_perf, self.config.get_bigquery_client()
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 483355a85ac05..944814b6936a4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -206,11 +206,6 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool:
         description="This flag enables the data lineage extraction from Data Lineage API exposed by Google Data Catalog. NOTE: This extractor can't build views lineage. It's recommended to enable the view's DDL parsing. Read the docs to have more information about: https://cloud.google.com/data-catalog/docs/concepts/about-data-lineage",
     )
 
-    convert_urns_to_lowercase: bool = Field(
-        default=False,
-        description="Convert urns to lowercase.",
-    )
-
     enable_legacy_sharded_table_support: bool = Field(
         default=True,
         description="Use the legacy sharded table urn suffix added.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py
index 566304e1999b7..d5039360da567 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py
@@ -18,7 +18,10 @@
 
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.kafka import KafkaConsumerConnectionConfig
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import (
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+)
 from datahub.emitter import mce_builder
 from datahub.emitter.mce_builder import (
     make_data_platform_urn,
@@ -76,7 +79,11 @@ class KafkaTopicConfigKeys(str, Enum):
     UNCLEAN_LEADER_ELECTION_CONFIG = "unclean.leader.election.enable"
 
 
-class KafkaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
+class KafkaSourceConfig(
+    StatefulIngestionConfigBase,
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+):
     connection: KafkaConsumerConnectionConfig = KafkaConsumerConnectionConfig()
 
     topic_patterns: AllowDenyPattern = AllowDenyPattern(allow=[".*"], deny=["^_.*"])
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
index 677d32c8bac08..08cc74aec3977 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
@@ -7,7 +7,10 @@
 from pydantic import Field
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import (
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+)
 from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -21,7 +24,11 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-class SQLCommonConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
+class SQLCommonConfig(
+    StatefulIngestionConfigBase,
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+):
     options: dict = pydantic.Field(
         default_factory=dict,
         description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index 51390873712d3..a57ee39848855 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -7,7 +7,10 @@
 from pydantic import Field
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import (
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+)
 from datahub.configuration.validate_field_removal import pydantic_removed_field
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -91,6 +94,7 @@ class UnityCatalogSourceConfig(
     BaseUsageConfig,
     DatasetSourceConfigMixin,
     StatefulProfilingConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
 ):
     token: str = pydantic.Field(description="Databricks personal access token")
     workspace_url: str = pydantic.Field(
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
index 261f95331af61..e13d439161064 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
@@ -3,7 +3,11 @@
 from avro.schema import Field, RecordSchema
 
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.metadata.schema_classes import DictWrapper
+from datahub.metadata.schema_classes import (
+    DictWrapper,
+    MetadataChangeEventClass,
+    MetadataChangeProposalClass,
+)
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.urn import Urn, guess_entity_type
 
@@ -32,7 +36,7 @@ def list_urns_with_path(
 
     if isinstance(model, MetadataChangeProposalWrapper):
         if model.entityUrn:
-            urns.append((model.entityUrn, ["urn"]))
+            urns.append((model.entityUrn, ["entityUrn"]))
         if model.entityKeyAspect:
             urns.extend(
                 _add_prefix_to_paths(
@@ -83,7 +87,15 @@ def list_urns(model: Union[DictWrapper, MetadataChangeProposalWrapper]) -> List[
     return [urn for urn, _ in list_urns_with_path(model)]
 
 
-def transform_urns(model: DictWrapper, func: Callable[[str], str]) -> None:
+def transform_urns(
+    model: Union[
+        DictWrapper,
+        MetadataChangeEventClass,
+        MetadataChangeProposalClass,
+        MetadataChangeProposalWrapper,
+    ],
+    func: Callable[[str], str],
+) -> None:
     """
     Rewrites all URNs in the given object according to the given function.
     """
@@ -95,7 +107,9 @@ def transform_urns(model: DictWrapper, func: Callable[[str], str]) -> None:
 
 
 def _modify_at_path(
-    model: Union[DictWrapper, list], path: _Path, new_value: str
+    model: Union[DictWrapper, MetadataChangeProposalWrapper, list],
+    path: _Path,
+    new_value: str,
 ) -> None:
     assert len(path) > 0
 
@@ -103,6 +117,8 @@ def _modify_at_path(
         if isinstance(path[0], int):
             assert isinstance(model, list)
             model[path[0]] = new_value
+        elif isinstance(model, MetadataChangeProposalWrapper):
+            setattr(model, path[0], new_value)
         else:
             assert isinstance(model, DictWrapper)
             model._inner_dict[path[0]] = new_value
@@ -120,7 +136,14 @@ def _lowercase_dataset_urn(dataset_urn: str) -> str:
     return str(cur_urn)
 
 
-def lowercase_dataset_urns(model: DictWrapper) -> None:
+def lowercase_dataset_urns(
+    model: Union[
+        DictWrapper,
+        MetadataChangeEventClass,
+        MetadataChangeProposalClass,
+        MetadataChangeProposalWrapper,
+    ]
+) -> None:
     def modify_urn(urn: str) -> str:
         if guess_entity_type(urn) == "dataset":
             return _lowercase_dataset_urn(urn)
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
index b6ec6ebce240c..b667af8bb41e9 100644
--- a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
@@ -16,6 +16,7 @@
 from datahub.ingestion.api.source_helpers import (
     auto_browse_path_v2,
     auto_empty_dataset_usage_statistics,
+    auto_lowercase_urns,
     auto_status_aspect,
     auto_workunit,
 )
@@ -275,6 +276,75 @@ def test_auto_browse_path_v2_legacy_browse_path(telemetry_ping_mock):
     assert paths["platform,dataset-2,PROD)"] == _make_browse_path_entries(["something"])
 
 
+def test_auto_lowercase_aspects():
+    mcws = auto_workunit(
+        [
+            MetadataChangeProposalWrapper(
+                entityUrn=make_dataset_urn(
+                    "bigquery", "myProject.mySchema.myTable", "PROD"
+                ),
+                aspect=models.DatasetKeyClass(
+                    "urn:li:dataPlatform:bigquery", "myProject.mySchema.myTable", "PROD"
+                ),
+            ),
+            MetadataChangeProposalWrapper(
+                entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a",
+                aspect=models.ContainerPropertiesClass(
+                    name="test",
+                ),
+            ),
+            models.MetadataChangeEventClass(
+                proposedSnapshot=models.DatasetSnapshotClass(
+                    urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-Public-Data.Covid19_Aha.staffing,PROD)",
+                    aspects=[
+                        models.DatasetPropertiesClass(
+                            customProperties={
+                                "key": "value",
+                            },
+                        ),
+                    ],
+                ),
+            ),
+        ]
+    )
+
+    expected = [
+        *list(
+            auto_workunit(
+                [
+                    MetadataChangeProposalWrapper(
+                        entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,myproject.myschema.mytable,PROD)",
+                        aspect=models.DatasetKeyClass(
+                            "urn:li:dataPlatform:bigquery",
+                            "myProject.mySchema.myTable",
+                            "PROD",
+                        ),
+                    ),
+                    MetadataChangeProposalWrapper(
+                        entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a",
+                        aspect=models.ContainerPropertiesClass(
+                            name="test",
+                        ),
+                    ),
+                    models.MetadataChangeEventClass(
+                        proposedSnapshot=models.DatasetSnapshotClass(
+                            urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_aha.staffing,PROD)",
+                            aspects=[
+                                models.DatasetPropertiesClass(
+                                    customProperties={
+                                        "key": "value",
+                                    },
+                                ),
+                            ],
+                        ),
+                    ),
+                ]
+            )
+        ),
+    ]
+    assert list(auto_lowercase_urns(mcws)) == expected
+
+
 @patch("datahub.ingestion.api.source_helpers.telemetry.telemetry_instance.ping")
 def test_auto_browse_path_v2_container_over_legacy_browse_path(telemetry_ping_mock):
     structure = {"a": {"b": ["c"]}}

From 8813ae2fb15a1f80d5f0ef433fce1f84e1a240b5 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 12 Oct 2023 07:58:10 -0400
Subject: [PATCH 39/98] feat(ingest/s3): support .gzip and fix decompression
 bug (#8990)

---
 .../ingestion/source/data_lake_common/path_spec.py       | 9 ++++++++-
 .../src/datahub/ingestion/source/s3/source.py            | 8 +++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
index d1c949f48e2cd..a35fb94614f72 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
@@ -18,7 +18,14 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 SUPPORTED_FILE_TYPES: List[str] = ["csv", "tsv", "json", "parquet", "avro"]
-SUPPORTED_COMPRESSIONS: List[str] = ["gz", "bz2"]
+
+# These come from the smart_open library.
+SUPPORTED_COMPRESSIONS: List[str] = [
+    "gz",
+    "bz2",
+    # We have a monkeypatch on smart_open that aliases .gzip to .gz.
+    "gzip",
+]
 
 
 class PathSpec(ConfigModel):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index ac4433b7eb1f0..eb49fcbb268c0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -10,6 +10,7 @@
 from pathlib import PurePath
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 
+import smart_open.compression as so_compression
 from more_itertools import peekable
 from pyspark.conf import SparkConf
 from pyspark.sql import SparkSession
@@ -120,6 +121,9 @@
 }
 PAGE_SIZE = 1000
 
+# Hack to support the .gzip extension with smart_open.
+so_compression.register_compressor(".gzip", so_compression._COMPRESSOR_REGISTRY[".gz"])
+
 
 def get_column_type(
     report: SourceReport, dataset_name: str, column_type: str
@@ -407,7 +411,9 @@ def get_fields(self, table_data: TableData, path_spec: PathSpec) -> List:
                 table_data.full_path, "rb", transport_params={"client": s3_client}
             )
         else:
-            file = open(table_data.full_path, "rb")
+            # We still use smart_open here to take advantage of the compression
+            # capabilities of smart_open.
+            file = smart_open(table_data.full_path, "rb")
 
         fields = []
 

From f6e131206394e1f56e4f966689c8abd1e8641919 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Thu, 12 Oct 2023 18:43:14 +0100
Subject: [PATCH 40/98] feat(ingestion): Adds support for  memory profiling
 (#8856)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 docs-website/sidebars.js                      |   1 +
 .../docs/dev_guides/profiling_ingestions.md   |  55 +++++++
 metadata-ingestion/setup.py                   |   5 +
 .../src/datahub/ingestion/run/pipeline.py     | 148 ++++++++++--------
 .../datahub/ingestion/run/pipeline_config.py  |   7 +
 5 files changed, 148 insertions(+), 68 deletions(-)
 create mode 100644 metadata-ingestion/docs/dev_guides/profiling_ingestions.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index bdf3926c17e0d..21b3a1d3fe4d3 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -140,6 +140,7 @@ module.exports = {
             "metadata-ingestion/docs/dev_guides/classification",
             "metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source",
             "metadata-ingestion/docs/dev_guides/sql_profiles",
+            "metadata-ingestion/docs/dev_guides/profiling_ingestions",
           ],
         },
       ],
diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
new file mode 100644
index 0000000000000..d876d99b494f8
--- /dev/null
+++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
@@ -0,0 +1,55 @@
+import FeatureAvailability from '@site/src/components/FeatureAvailability';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Profiling ingestions
+
+<FeatureAvailability/>
+
+**🤝 Version compatibility**
+> Open Source DataHub: **0.11.1** | Acryl: **0.2.12**
+
+This page documents how to perform memory profiles of ingestion runs. 
+It is useful when trying to size the amount of resources necessary to ingest some source or when developing new features or sources.
+
+## How to use
+Install the `debug` plugin for DataHub's CLI wherever the ingestion runs:
+
+```bash
+pip install 'acryl-datahub[debug]'
+```
+
+This will install [memray](https://github.com/bloomberg/memray) in your python environment.
+
+Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion:
+```yaml
+source:
+  ...
+
+sink:
+  ...
+
+flags:
+  generate_memory_profiles: "<path to folder where dumps will be written to>"
+```
+
+Once the ingestion run starts a binary file will be created and appended to during the execution of the ingestion. 
+
+These files follow the pattern `file-<ingestion-run-urn>.bin` for a unique identification.
+Once the ingestion has finished you can use `memray` to analyze the memory dump in a flamegraph view using:
+
+```$ memray flamegraph file-None-file-2023_09_18-21_38_43.bin```
+
+This will generate an interactive HTML file for analysis:
+
+<p align="center">
+    <img width="70%" src="https://github.com/datahub-project/static-assets/blob/main/imgs/metadata-ingestion/memray-example.png?raw=true"/>
+</p>
+
+
+`memray` has an extensive set of features for memory investigation. Take a look at their [documentation](https://bloomberg.github.io/memray/overview.html) to see the full feature set.
+
+
+## Questions
+
+If you've got any questions on configuring profiling, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index fe8e3be4632c4..61e7b684682a4 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -431,6 +431,10 @@
 deepdiff_dep = "deepdiff"
 test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"}
 
+debug_requirements = {
+    "memray"
+}
+
 base_dev_requirements = {
     *base_requirements,
     *framework_common,
@@ -723,5 +727,6 @@
         "dev": list(dev_requirements),
         "testing-utils": list(test_api_requirements),  # To import `datahub.testing`
         "integration-tests": list(full_test_dev_requirements),
+        "debug": list(debug_requirements),
     },
 )
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 79d959965e0dd..07b55e0e25a89 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -353,77 +353,89 @@ def _time_to_print(self) -> bool:
         return False
 
     def run(self) -> None:
-        self.final_status = "unknown"
-        self._notify_reporters_on_ingestion_start()
-        callback = None
-        try:
-            callback = (
-                LoggingCallback()
-                if not self.config.failure_log.enabled
-                else DeadLetterQueueCallback(
-                    self.ctx, self.config.failure_log.log_config
-                )
-            )
-            for wu in itertools.islice(
-                self.source.get_workunits(),
-                self.preview_workunits if self.preview_mode else None,
-            ):
-                try:
-                    if self._time_to_print():
-                        self.pretty_print_summary(currently_running=True)
-                except Exception as e:
-                    logger.warning(f"Failed to print summary {e}")
-
-                if not self.dry_run:
-                    self.sink.handle_work_unit_start(wu)
-                try:
-                    record_envelopes = self.extractor.get_records(wu)
-                    for record_envelope in self.transform(record_envelopes):
-                        if not self.dry_run:
-                            self.sink.write_record_async(record_envelope, callback)
-
-                except RuntimeError:
-                    raise
-                except SystemExit:
-                    raise
-                except Exception as e:
-                    logger.error(
-                        "Failed to process some records. Continuing.", exc_info=e
+        with contextlib.ExitStack() as stack:
+            if self.config.flags.generate_memory_profiles:
+                import memray
+
+                stack.enter_context(
+                    memray.Tracker(
+                        f"{self.config.flags.generate_memory_profiles}/{self.config.run_id}.bin"
                     )
-                    # TODO: Transformer errors should cause the pipeline to fail.
-
-                self.extractor.close()
-                if not self.dry_run:
-                    self.sink.handle_work_unit_end(wu)
-            self.source.close()
-            # no more data is coming, we need to let the transformers produce any additional records if they are holding on to state
-            for record_envelope in self.transform(
-                [
-                    RecordEnvelope(
-                        record=EndOfStream(), metadata={"workunit_id": "end-of-stream"}
+                )
+
+            self.final_status = "unknown"
+            self._notify_reporters_on_ingestion_start()
+            callback = None
+            try:
+                callback = (
+                    LoggingCallback()
+                    if not self.config.failure_log.enabled
+                    else DeadLetterQueueCallback(
+                        self.ctx, self.config.failure_log.log_config
                     )
-                ]
-            ):
-                if not self.dry_run and not isinstance(
-                    record_envelope.record, EndOfStream
+                )
+                for wu in itertools.islice(
+                    self.source.get_workunits(),
+                    self.preview_workunits if self.preview_mode else None,
+                ):
+                    try:
+                        if self._time_to_print():
+                            self.pretty_print_summary(currently_running=True)
+                    except Exception as e:
+                        logger.warning(f"Failed to print summary {e}")
+
+                    if not self.dry_run:
+                        self.sink.handle_work_unit_start(wu)
+                    try:
+                        record_envelopes = self.extractor.get_records(wu)
+                        for record_envelope in self.transform(record_envelopes):
+                            if not self.dry_run:
+                                self.sink.write_record_async(record_envelope, callback)
+
+                    except RuntimeError:
+                        raise
+                    except SystemExit:
+                        raise
+                    except Exception as e:
+                        logger.error(
+                            "Failed to process some records. Continuing.",
+                            exc_info=e,
+                        )
+                        # TODO: Transformer errors should cause the pipeline to fail.
+
+                    self.extractor.close()
+                    if not self.dry_run:
+                        self.sink.handle_work_unit_end(wu)
+                self.source.close()
+                # no more data is coming, we need to let the transformers produce any additional records if they are holding on to state
+                for record_envelope in self.transform(
+                    [
+                        RecordEnvelope(
+                            record=EndOfStream(),
+                            metadata={"workunit_id": "end-of-stream"},
+                        )
+                    ]
                 ):
-                    # TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc.
-                    self.sink.write_record_async(record_envelope, callback)
-
-            self.sink.close()
-            self.process_commits()
-            self.final_status = "completed"
-        except (SystemExit, RuntimeError, KeyboardInterrupt) as e:
-            self.final_status = "cancelled"
-            logger.error("Caught error", exc_info=e)
-            raise
-        finally:
-            clear_global_warnings()
-
-            if callback and hasattr(callback, "close"):
-                callback.close()  # type: ignore
-
-            self._notify_reporters_on_ingestion_completion()
+                    if not self.dry_run and not isinstance(
+                        record_envelope.record, EndOfStream
+                    ):
+                        # TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc.
+                        self.sink.write_record_async(record_envelope, callback)
+
+                self.sink.close()
+                self.process_commits()
+                self.final_status = "completed"
+            except (SystemExit, RuntimeError, KeyboardInterrupt) as e:
+                self.final_status = "cancelled"
+                logger.error("Caught error", exc_info=e)
+                raise
+            finally:
+                clear_global_warnings()
+
+                if callback and hasattr(callback, "close"):
+                    callback.close()  # type: ignore
+
+                self._notify_reporters_on_ingestion_completion()
 
     def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
         """
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index ff9a7a6f3d146..da3cee8ad9c1b 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -57,6 +57,13 @@ class FlagsConfig(ConfigModel):
         ),
     )
 
+    generate_memory_profiles: Optional[str] = Field(
+        default=None,
+        description=(
+            "Generate memray memory dumps for ingestion process by providing a path to write the dump file in."
+        ),
+    )
+
 
 class PipelineConfig(ConfigModel):
     # Once support for discriminated unions gets merged into Pydantic, we can

From c564abcbf049e5251f9cc25bf0e339956279649d Mon Sep 17 00:00:00 2001
From: Amanda Hernando <110099762+amanda-her@users.noreply.github.com>
Date: Thu, 12 Oct 2023 20:38:42 +0200
Subject: [PATCH 41/98] feat(auth): add group membership field resolver
 provider (#8846)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrián Pertíñez <khurzak92@gmail.com>
Co-authored-by: Adrián Pertíñez <adrian.pertinez@thoughtworks.com>
---
 .../authorization/AuthorizationUtils.java     |   8 +-
 .../dataset/DatasetStatsSummaryResolver.java  |   4 +-
 .../dataset/DatasetUsageStatsResolver.java    |   4 +-
 .../load/TimeSeriesAspectResolver.java        |   4 +-
 .../policy/GetGrantedPrivilegesResolver.java  |   6 +-
 .../resolvers/glossary/GlossaryUtilsTest.java |  36 +--
 .../query/CreateQueryResolverTest.java        |   6 +-
 .../query/DeleteQueryResolverTest.java        |   6 +-
 .../query/UpdateQueryResolverTest.java        |  10 +-
 .../com/datahub/authorization/AuthUtil.java   |  10 +-
 .../authorization/AuthorizationRequest.java   |   2 +-
 .../authorization/AuthorizerContext.java      |   4 +-
 .../authorization/EntityFieldType.java        |  31 ++
 .../com/datahub/authorization/EntitySpec.java |  23 ++
 .../authorization/EntitySpecResolver.java     |  11 +
 .../datahub/authorization/FieldResolver.java  |   6 +-
 .../authorization/ResolvedEntitySpec.java     |  66 ++++
 .../authorization/ResolvedResourceSpec.java   |  55 ----
 .../authorization/ResourceFieldType.java      |  27 --
 .../datahub/authorization/ResourceSpec.java   |  23 --
 .../authorization/ResourceSpecResolver.java   |  11 -
 .../auth/authorization/Authorizer.java        |   4 +-
 .../authorization/AuthorizerChain.java        |   2 +-
 .../authorization/DataHubAuthorizer.java      |  42 ++-
 ...er.java => DefaultEntitySpecResolver.java} |  33 +-
 .../datahub/authorization/FilterUtils.java    |   8 +-
 .../datahub/authorization/PolicyEngine.java   | 206 +++++-------
 ...PlatformInstanceFieldResolverProvider.java |  28 +-
 .../DomainFieldResolverProvider.java          |  20 +-
 .../EntityFieldResolverProvider.java          |  22 ++
 .../EntityTypeFieldResolverProvider.java      |  16 +-
 .../EntityUrnFieldResolverProvider.java       |  16 +-
 .../GroupMembershipFieldResolverProvider.java |  78 +++++
 .../OwnerFieldResolverProvider.java           |  20 +-
 .../ResourceFieldResolverProvider.java        |  22 --
 .../authorization/DataHubAuthorizerTest.java  |  22 +-
 .../authorization/PolicyEngineTest.java       | 304 ++++++++----------
 ...formInstanceFieldResolverProviderTest.java |  37 ++-
 ...upMembershipFieldResolverProviderTest.java | 212 ++++++++++++
 .../factory/auth/AuthorizerChainFactory.java  |  14 +-
 .../delegates/EntityApiDelegateImpl.java      |   9 +-
 .../openapi/entities/EntitiesController.java  |  10 +-
 .../RelationshipsController.java              |   6 +-
 .../openapi/timeline/TimelineController.java  |   4 +-
 .../openapi/util/MappingUtil.java             |  11 +-
 .../datahub/plugins/test/TestAuthorizer.java  |   4 +-
 .../resources/entity/AspectResource.java      |  13 +-
 .../entity/BatchIngestionRunResource.java     |   6 +-
 .../resources/entity/EntityResource.java      |  54 ++--
 .../resources/entity/EntityV2Resource.java    |   8 +-
 .../entity/EntityVersionedV2Resource.java     |   6 +-
 .../resources/lineage/Relationships.java      |   8 +-
 .../metadata/resources/operations/Utils.java  |   6 +-
 .../resources/platform/PlatformResource.java  |   4 +-
 .../resources/restli/RestliUtils.java         |   6 +-
 .../metadata/resources/usage/UsageStats.java  |   8 +-
 56 files changed, 937 insertions(+), 685 deletions(-)
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java
 rename metadata-service/auth-impl/src/main/java/com/datahub/authorization/{DefaultResourceSpecResolver.java => DefaultEntitySpecResolver.java} (51%)
 create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
 create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
 delete mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java
 create mode 100644 metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java
index 3089b8c8fc2db..03e63c7fb472f 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java
@@ -4,7 +4,7 @@
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.Urn;
@@ -90,7 +90,7 @@ public static boolean canManageTags(@Nonnull QueryContext context) {
   }
 
   public static boolean canDeleteEntity(@Nonnull Urn entityUrn, @Nonnull QueryContext context) {
-    return isAuthorized(context, Optional.of(new ResourceSpec(entityUrn.getEntityType(), entityUrn.toString())), PoliciesConfig.DELETE_ENTITY_PRIVILEGE);
+    return isAuthorized(context, Optional.of(new EntitySpec(entityUrn.getEntityType(), entityUrn.toString())), PoliciesConfig.DELETE_ENTITY_PRIVILEGE);
   }
 
   public static boolean canManageUserCredentials(@Nonnull QueryContext context) {
@@ -173,7 +173,7 @@ public static boolean canDeleteQuery(@Nonnull Urn entityUrn, @Nonnull List<Urn>
 
   public static boolean isAuthorized(
       @Nonnull QueryContext context,
-      @Nonnull Optional<ResourceSpec> resourceSpec,
+      @Nonnull Optional<EntitySpec> resourceSpec,
       @Nonnull PoliciesConfig.Privilege privilege) {
     final Authorizer authorizer = context.getAuthorizer();
     final String actor = context.getActorUrn();
@@ -196,7 +196,7 @@ public static boolean isAuthorized(
       @Nonnull String resource,
       @Nonnull DisjunctivePrivilegeGroup privilegeGroup
   ) {
-    final ResourceSpec resourceSpec = new ResourceSpec(resourceType, resource);
+    final EntitySpec resourceSpec = new EntitySpec(resourceType, resource);
     return AuthUtil.isAuthorized(authorizer, actor, Optional.of(resourceSpec), privilegeGroup);
   }
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
index 23be49c7e7140..2873866bb34f7 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
@@ -1,6 +1,6 @@
 package com.linkedin.datahub.graphql.resolvers.dataset;
 
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.linkedin.common.urn.Urn;
@@ -104,7 +104,7 @@ private CorpUser createPartialUser(final Urn userUrn) {
 
   private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) {
     return AuthorizationUtils.isAuthorized(context,
-            Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())),
+            Optional.of(new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())),
             PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE);
   }
 }
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
index 20361830ad5a5..e4bec8e896fdf 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
@@ -1,6 +1,6 @@
 package com.linkedin.datahub.graphql.resolvers.dataset;
 
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
@@ -52,7 +52,7 @@ public CompletableFuture<UsageQueryResult> get(DataFetchingEnvironment environme
 
   private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) {
     return AuthorizationUtils.isAuthorized(context,
-        Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())),
+        Optional.of(new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())),
         PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE);
   }
 }
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
index 197ca8640559d..f13ebf8373e91 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
@@ -1,6 +1,6 @@
 package com.linkedin.datahub.graphql.resolvers.load;
 
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
 import com.linkedin.datahub.graphql.generated.Entity;
@@ -79,7 +79,7 @@ public TimeSeriesAspectResolver(
   private boolean isAuthorized(QueryContext context, String urn) {
     if (_entityName.equals(Constants.DATASET_ENTITY_NAME) && _aspectName.equals(
         Constants.DATASET_PROFILE_ASPECT_NAME)) {
-      return AuthorizationUtils.isAuthorized(context, Optional.of(new ResourceSpec(_entityName, urn)),
+      return AuthorizationUtils.isAuthorized(context, Optional.of(new EntitySpec(_entityName, urn)),
           PoliciesConfig.VIEW_DATASET_PROFILE_PRIVILEGE);
     }
     return true;
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java
index 2f20fdaf1e9b1..11f7793db82c8 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java
@@ -2,7 +2,7 @@
 
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.DataHubAuthorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
 import com.linkedin.datahub.graphql.generated.GetGrantedPrivilegesInput;
@@ -33,8 +33,8 @@ public CompletableFuture<Privileges> get(final DataFetchingEnvironment environme
     if (!isAuthorized(context, actor)) {
       throw new AuthorizationException("Unauthorized to get privileges for the given author.");
     }
-    final Optional<ResourceSpec> resourceSpec = Optional.ofNullable(input.getResourceSpec())
-        .map(spec -> new ResourceSpec(EntityTypeMapper.getName(spec.getResourceType()), spec.getResourceUrn()));
+    final Optional<EntitySpec> resourceSpec = Optional.ofNullable(input.getResourceSpec())
+        .map(spec -> new EntitySpec(EntityTypeMapper.getName(spec.getResourceType()), spec.getResourceUrn()));
 
     if (context.getAuthorizer() instanceof AuthorizerChain) {
       DataHubAuthorizer dataHubAuthorizer = ((AuthorizerChain) context.getAuthorizer()).getDefaultAuthorizer();
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java
index ccaab44f60dd4..8bfc32e1999ae 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.urn.GlossaryNodeUrn;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
@@ -89,17 +89,17 @@ private void setUpTests() throws Exception {
       Mockito.any(Authentication.class)
     )).thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentNode3Aspects)));
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
   }
 
-  private void mockAuthRequest(String privilege, AuthorizationResult.Type allowOrDeny, ResourceSpec resourceSpec) {
+  private void mockAuthRequest(String privilege, AuthorizationResult.Type allowOrDeny, EntitySpec resourceSpec) {
     final AuthorizationRequest authorizationRequest = new AuthorizationRequest(
         userUrn,
         privilege,
@@ -150,7 +150,7 @@ public void testCanManageChildrenEntitiesAuthorized() throws Exception {
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
+    final EntitySpec resourceSpec = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec);
 
     assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn, mockClient));
@@ -162,7 +162,7 @@ public void testCanManageChildrenEntitiesUnauthorized() throws Exception {
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
+    final EntitySpec resourceSpec = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec);
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec);
 
@@ -175,13 +175,13 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorized() throws Exceptio
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
 
     assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient));
@@ -193,13 +193,13 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorized() throws Except
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
 
     assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient));
@@ -211,10 +211,10 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorizedLevel2() throws Ex
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
 
     assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient));
@@ -226,10 +226,10 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorizedLevel2() throws
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
     assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn2, mockClient));
@@ -241,7 +241,7 @@ public void testCanManageChildrenRecursivelyEntitiesNoLevel2() throws Exception
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
     assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn3, mockClient));
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java
index 196eb24b52bf8..9c04c67dd3a3b 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -201,7 +201,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
@@ -210,7 +210,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java
index a6b4887b0e882..78c894f27cbc3 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -134,7 +134,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         DeleteQueryResolverTest.TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 DeleteQueryResolverTest.TEST_DATASET_URN.getEntityType(),
                 DeleteQueryResolverTest.TEST_DATASET_URN.toString()))
     );
@@ -143,7 +143,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java
index 7a76b6d6be5a4..9b500b5fb3936 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -206,7 +206,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
@@ -215,7 +215,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
@@ -224,7 +224,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN_2.getEntityType(),
                 TEST_DATASET_URN_2.toString()))
     );
@@ -233,7 +233,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN_2.getEntityType(),
                 TEST_DATASET_URN_2.toString()))
     );
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java
index dfb936c61ee0c..e159993a8a243 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java
@@ -11,7 +11,7 @@ public class AuthUtil {
   public static boolean isAuthorized(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
-      @Nonnull Optional<ResourceSpec> maybeResourceSpec,
+      @Nonnull Optional<EntitySpec> maybeResourceSpec,
       @Nonnull DisjunctivePrivilegeGroup privilegeGroup
   ) {
     for (ConjunctivePrivilegeGroup andPrivilegeGroup : privilegeGroup.getAuthorizedPrivilegeGroups()) {
@@ -27,7 +27,7 @@ public static boolean isAuthorized(
   public static boolean isAuthorizedForResources(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
-      @Nonnull List<Optional<ResourceSpec>> resourceSpecs,
+      @Nonnull List<Optional<EntitySpec>> resourceSpecs,
       @Nonnull DisjunctivePrivilegeGroup privilegeGroup
   ) {
     for (ConjunctivePrivilegeGroup andPrivilegeGroup : privilegeGroup.getAuthorizedPrivilegeGroups()) {
@@ -44,7 +44,7 @@ private static boolean isAuthorized(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
       @Nonnull ConjunctivePrivilegeGroup requiredPrivileges,
-      @Nonnull Optional<ResourceSpec> resourceSpec) {
+      @Nonnull Optional<EntitySpec> resourceSpec) {
     // Each privilege in a group _must_ all be true to permit the operation.
     for (final String privilege : requiredPrivileges.getRequiredPrivileges()) {
       // Create and evaluate an Authorization request.
@@ -62,11 +62,11 @@ private static boolean isAuthorizedForResources(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
       @Nonnull ConjunctivePrivilegeGroup requiredPrivileges,
-      @Nonnull List<Optional<ResourceSpec>> resourceSpecs) {
+      @Nonnull List<Optional<EntitySpec>> resourceSpecs) {
     // Each privilege in a group _must_ all be true to permit the operation.
     for (final String privilege : requiredPrivileges.getRequiredPrivileges()) {
       // Create and evaluate an Authorization request.
-      for (Optional<ResourceSpec> resourceSpec : resourceSpecs) {
+      for (Optional<EntitySpec> resourceSpec : resourceSpecs) {
         final AuthorizationRequest request = new AuthorizationRequest(actor, privilege, resourceSpec);
         final AuthorizationResult result = authorizer.authorize(request);
         if (AuthorizationResult.Type.DENY.equals(result.getType())) {
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java
index 084a455495551..9e75de3cbf44d 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java
@@ -21,5 +21,5 @@ public class AuthorizationRequest {
    * The resource that the user is requesting for, if applicable. If the privilege is a platform privilege
    * this optional will be empty.
    */
-  Optional<ResourceSpec> resourceSpec;
+  Optional<EntitySpec> resourceSpec;
 }
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java
index f9940d171d5d4..b79a4fa20c7ea 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java
@@ -18,9 +18,9 @@ public class AuthorizerContext {
   private final Map<String, Object> contextMap;
 
   /**
-   * A utility for resolving a {@link ResourceSpec} to resolved resource field values.
+   * A utility for resolving an {@link EntitySpec} to resolved entity field values.
    */
-  private ResourceSpecResolver resourceSpecResolver;
+  private EntitySpecResolver entitySpecResolver;
 
   /**
    *
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
new file mode 100644
index 0000000000000..46763f29a7040
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
@@ -0,0 +1,31 @@
+package com.datahub.authorization;
+
+/**
+ * List of entity field types to fetch for a given entity
+ */
+public enum EntityFieldType {
+  /**
+   * Type of the entity (e.g. dataset, chart)
+   */
+  TYPE,
+  /**
+   * Urn of the entity
+   */
+  URN,
+  /**
+   * Owners of the entity
+   */
+  OWNER,
+  /**
+   * Domains of the entity
+   */
+  DOMAIN,
+  /**
+   * Groups of which the entity (only applies to corpUser) is a member
+   */
+  GROUP_MEMBERSHIP,
+  /**
+   * Data platform instance of resource
+   */
+  DATA_PLATFORM_INSTANCE
+}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java
new file mode 100644
index 0000000000000..656bec0f44fc2
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java
@@ -0,0 +1,23 @@
+package com.datahub.authorization;
+
+import javax.annotation.Nonnull;
+import lombok.Value;
+
+
+/**
+ * Details about the entities involved in the authorization process. It models the actor and the resource being acted
+ * upon. Resource types currently supported can be found inside of {@link com.linkedin.metadata.authorization.PoliciesConfig}
+ */
+@Value
+public class EntitySpec {
+  /**
+   * The entity type. (dataset, chart, dashboard, corpGroup, etc).
+   */
+  @Nonnull
+  String type;
+  /**
+   * The entity identity. Most often, this corresponds to the raw entity urn. (urn:li:corpGroup:groupId)
+   */
+  @Nonnull
+  String entity;
+}
\ No newline at end of file
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java
new file mode 100644
index 0000000000000..67347fbf87a87
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java
@@ -0,0 +1,11 @@
+package com.datahub.authorization;
+
+/**
+ * An Entity Spec Resolver is responsible for resolving a {@link EntitySpec} to a {@link ResolvedEntitySpec}.
+ */
+public interface EntitySpecResolver {
+  /**
+   Resolve a {@link EntitySpec} to a resolved entity spec.
+   **/
+  ResolvedEntitySpec resolve(EntitySpec entitySpec);
+}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java
index 9318f5f8e7b96..955a06fd54cb9 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java
@@ -33,9 +33,9 @@ public static FieldResolver getResolverFromValues(Set<String> values) {
   /**
    * Helper function that returns FieldResolver given a fetchFieldValue function
    */
-  public static FieldResolver getResolverFromFunction(ResourceSpec resourceSpec,
-      Function<ResourceSpec, FieldValue> fetchFieldValue) {
-    return new FieldResolver(() -> CompletableFuture.supplyAsync(() -> fetchFieldValue.apply(resourceSpec)));
+  public static FieldResolver getResolverFromFunction(EntitySpec entitySpec,
+      Function<EntitySpec, FieldValue> fetchFieldValue) {
+    return new FieldResolver(() -> CompletableFuture.supplyAsync(() -> fetchFieldValue.apply(entitySpec)));
   }
 
   public static FieldValue emptyFieldValue() {
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java
new file mode 100644
index 0000000000000..7948766df5715
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java
@@ -0,0 +1,66 @@
+package com.datahub.authorization;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nullable;
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+import lombok.ToString;
+
+
+/**
+ * Wrapper around authorization request with field resolvers for lazily fetching the field values for each field type
+ */
+@RequiredArgsConstructor
+@ToString
+public class ResolvedEntitySpec {
+  @Getter
+  private final EntitySpec spec;
+  private final Map<EntityFieldType, FieldResolver> fieldResolvers;
+
+  public Set<String> getFieldValues(EntityFieldType entityFieldType) {
+    if (!fieldResolvers.containsKey(entityFieldType)) {
+      return Collections.emptySet();
+    }
+    return fieldResolvers.get(entityFieldType).getFieldValuesFuture().join().getValues();
+  }
+
+  /**
+   * Fetch the owners for an entity.
+   * @return a set of owner urns, or empty set if none exist.
+   */
+  public Set<String> getOwners() {
+    if (!fieldResolvers.containsKey(EntityFieldType.OWNER)) {
+      return Collections.emptySet();
+    }
+    return fieldResolvers.get(EntityFieldType.OWNER).getFieldValuesFuture().join().getValues();
+  }
+
+  /**
+   * Fetch the platform instance for a Resolved Resource Spec
+   * @return a Platform Instance or null if one does not exist.
+   */
+  @Nullable
+  public String getDataPlatformInstance() {
+    if (!fieldResolvers.containsKey(EntityFieldType.DATA_PLATFORM_INSTANCE)) {
+      return null;
+    }
+    Set<String> dataPlatformInstance = fieldResolvers.get(EntityFieldType.DATA_PLATFORM_INSTANCE).getFieldValuesFuture().join().getValues();
+    if (dataPlatformInstance.size() > 0) {
+      return dataPlatformInstance.stream().findFirst().get();
+    }
+    return null;
+  }
+
+  /**
+   * Fetch the group membership for an entity.
+   * @return a set of groups urns, or empty set if none exist.
+   */
+  public Set<String> getGroupMembership() {
+    if (!fieldResolvers.containsKey(EntityFieldType.GROUP_MEMBERSHIP)) {
+      return Collections.emptySet();
+    }
+    return fieldResolvers.get(EntityFieldType.GROUP_MEMBERSHIP).getFieldValuesFuture().join().getValues();
+  }
+}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
deleted file mode 100644
index 8e429a8ca1b94..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package com.datahub.authorization;
-
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-import javax.annotation.Nullable;
-import lombok.Getter;
-import lombok.RequiredArgsConstructor;
-import lombok.ToString;
-
-
-/**
- * Wrapper around authorization request with field resolvers for lazily fetching the field values for each field type
- */
-@RequiredArgsConstructor
-@ToString
-public class ResolvedResourceSpec {
-  @Getter
-  private final ResourceSpec spec;
-  private final Map<ResourceFieldType, FieldResolver> fieldResolvers;
-
-  public Set<String> getFieldValues(ResourceFieldType resourceFieldType) {
-    if (!fieldResolvers.containsKey(resourceFieldType)) {
-      return Collections.emptySet();
-    }
-    return fieldResolvers.get(resourceFieldType).getFieldValuesFuture().join().getValues();
-  }
-
-  /**
-   * Fetch the owners for a resource.
-   * @return a set of owner urns, or empty set if none exist.
-   */
-  public Set<String> getOwners() {
-    if (!fieldResolvers.containsKey(ResourceFieldType.OWNER)) {
-      return Collections.emptySet();
-    }
-    return fieldResolvers.get(ResourceFieldType.OWNER).getFieldValuesFuture().join().getValues();
-  }
-
-  /**
-   * Fetch the platform instance for a Resolved Resource Spec
-   * @return a Platform Instance or null if one does not exist.
-   */
-  @Nullable
-  public String getDataPlatformInstance() {
-    if (!fieldResolvers.containsKey(ResourceFieldType.DATA_PLATFORM_INSTANCE)) {
-      return null;
-    }
-    Set<String> dataPlatformInstance = fieldResolvers.get(ResourceFieldType.DATA_PLATFORM_INSTANCE).getFieldValuesFuture().join().getValues();
-    if (dataPlatformInstance.size() > 0) {
-      return dataPlatformInstance.stream().findFirst().get();
-    }
-    return null;
-  }
-}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
deleted file mode 100644
index 478522dc7c331..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package com.datahub.authorization;
-
-/**
- * List of resource field types to fetch for a given resource
- */
-public enum ResourceFieldType {
-  /**
-   * Type of resource (e.g. dataset, chart)
-   */
-  RESOURCE_TYPE,
-  /**
-   * Urn of resource
-   */
-  RESOURCE_URN,
-  /**
-   * Owners of resource
-   */
-  OWNER,
-  /**
-   * Domains of resource
-   */
-  DOMAIN,
-  /**
-   * Data platform instance of resource
-   */
-  DATA_PLATFORM_INSTANCE
-}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java
deleted file mode 100644
index c1bd53e31fe29..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package com.datahub.authorization;
-
-import javax.annotation.Nonnull;
-import lombok.Value;
-
-
-/**
- * Details about a specific resource being acted upon. Resource types currently supported
- * can be found inside of {@link com.linkedin.metadata.authorization.PoliciesConfig}
- */
-@Value
-public class ResourceSpec {
-  /**
-   * The resource type. Most often, this corresponds to the entity type. (dataset, chart, dashboard, corpGroup, etc).
-   */
-  @Nonnull
-  String type;
-  /**
-   * The resource identity. Most often, this corresponds to the raw entity urn. (urn:li:corpGroup:groupId)
-   */
-  @Nonnull
-  String resource;
-}
\ No newline at end of file
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java
deleted file mode 100644
index 05c35f377b9a9..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.datahub.authorization;
-
-/**
- * A Resource Spec Resolver is responsible for resolving a {@link ResourceSpec} to a {@link ResolvedResourceSpec}.
- */
-public interface ResourceSpecResolver {
-  /**
-   Resolve a {@link ResourceSpec} to a resolved resource spec.
-   **/
-  ResolvedResourceSpec resolve(ResourceSpec resourceSpec);
-}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java b/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java
index ce7a3f22b3147..c731a3ec987c1 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java
@@ -4,7 +4,7 @@
 import com.datahub.authorization.AuthorizationResult;
 import com.datahub.authorization.AuthorizedActors;
 import com.datahub.authorization.AuthorizerContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.Plugin;
 import java.util.Map;
 import java.util.Optional;
@@ -32,5 +32,5 @@ public interface Authorizer extends Plugin {
    * Retrieves the current list of actors authorized to for a particular privilege against
    * an optional resource
    */
-  AuthorizedActors authorizedActors(final String privilege, final Optional<ResourceSpec> resourceSpec);
+  AuthorizedActors authorizedActors(final String privilege, final Optional<EntitySpec> resourceSpec);
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
index d62c37160f816..f8eca541e1efb 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
@@ -82,7 +82,7 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
   }
 
   @Override
-  public AuthorizedActors authorizedActors(String privilege, Optional<ResourceSpec> resourceSpec) {
+  public AuthorizedActors authorizedActors(String privilege, Optional<EntitySpec> resourceSpec) {
     if (this.authorizers.isEmpty()) {
       return null;
     }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
index f653ccf72cf54..4553139e3ca54 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
@@ -8,6 +8,8 @@
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.linkedin.policy.DataHubPolicyInfo;
+
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -55,7 +57,7 @@ public enum AuthorizationMode {
   private final ScheduledExecutorService _refreshExecutorService = Executors.newScheduledThreadPool(1);
   private final PolicyRefreshRunnable _policyRefreshRunnable;
   private final PolicyEngine _policyEngine;
-  private ResourceSpecResolver _resourceSpecResolver;
+  private EntitySpecResolver _entitySpecResolver;
   private AuthorizationMode _mode;
 
   public static final String ALL = "ALL";
@@ -76,7 +78,7 @@ public DataHubAuthorizer(
   @Override
   public void init(@Nonnull Map<String, Object> authorizerConfig, @Nonnull AuthorizerContext ctx) {
     // Pass. No static config.
-    _resourceSpecResolver = Objects.requireNonNull(ctx.getResourceSpecResolver());
+    _entitySpecResolver = Objects.requireNonNull(ctx.getEntitySpecResolver());
   }
 
   public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request) {
@@ -86,7 +88,7 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
       return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, null);
     }
 
-    Optional<ResolvedResourceSpec> resolvedResourceSpec = request.getResourceSpec().map(_resourceSpecResolver::resolve);
+    Optional<ResolvedEntitySpec> resolvedResourceSpec = request.getResourceSpec().map(_entitySpecResolver::resolve);
 
     // 1. Fetch the policies relevant to the requested privilege.
     final List<DataHubPolicyInfo> policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>());
@@ -102,14 +104,17 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
     return new AuthorizationResult(request, AuthorizationResult.Type.DENY,  null);
   }
 
-  public List<String> getGrantedPrivileges(final String actorUrn, final Optional<ResourceSpec> resourceSpec) {
+  public List<String> getGrantedPrivileges(final String actor, final Optional<EntitySpec> resourceSpec) {
 
     // 1. Fetch all policies
     final List<DataHubPolicyInfo> policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>());
 
-    Optional<ResolvedResourceSpec> resolvedResourceSpec = resourceSpec.map(_resourceSpecResolver::resolve);
+    Urn actorUrn = UrnUtils.getUrn(actor);
+    final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor));
+
+    Optional<ResolvedEntitySpec> resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
 
-    return _policyEngine.getGrantedPrivileges(policiesToEvaluate, UrnUtils.getUrn(actorUrn), resolvedResourceSpec);
+    return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec);
   }
 
   /**
@@ -118,11 +123,11 @@ public List<String> getGrantedPrivileges(final String actorUrn, final Optional<R
    */
   public AuthorizedActors authorizedActors(
       final String privilege,
-      final Optional<ResourceSpec> resourceSpec) {
+      final Optional<EntitySpec> resourceSpec) {
     // Step 1: Find policies granting the privilege.
     final List<DataHubPolicyInfo> policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>());
 
-    Optional<ResolvedResourceSpec> resolvedResourceSpec = resourceSpec.map(_resourceSpecResolver::resolve);
+    Optional<ResolvedEntitySpec> resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
 
     final List<Urn> authorizedUsers = new ArrayList<>();
     final List<Urn> authorizedGroups = new ArrayList<>();
@@ -180,19 +185,36 @@ private boolean isSystemRequest(final AuthorizationRequest request, final Authen
   /**
    * Returns true if a policy grants the requested privilege for a given actor and resource.
    */
-  private boolean isRequestGranted(final DataHubPolicyInfo policy, final AuthorizationRequest request, final Optional<ResolvedResourceSpec> resourceSpec) {
+  private boolean isRequestGranted(final DataHubPolicyInfo policy, final AuthorizationRequest request, final Optional<ResolvedEntitySpec> resourceSpec) {
     if (AuthorizationMode.ALLOW_ALL.equals(mode())) {
       return true;
     }
+
+    Optional<Urn> actorUrn = getUrnFromRequestActor(request.getActorUrn());
+    if (actorUrn.isEmpty()) {
+      return false;
+    }
+
+    final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(
+            new EntitySpec(actorUrn.get().getEntityType(), request.getActorUrn()));
     final PolicyEngine.PolicyEvaluationResult result = _policyEngine.evaluatePolicy(
         policy,
-        request.getActorUrn(),
+        resolvedActorSpec,
         request.getPrivilege(),
         resourceSpec
     );
     return result.isGranted();
   }
 
+  private Optional<Urn> getUrnFromRequestActor(String actor) {
+    try {
+      return Optional.of(Urn.createFromString(actor));
+    } catch (URISyntaxException e) {
+      log.error(String.format("Failed to bind actor %s to an URN. Actors must be URNs. Denying the authorization request", actor));
+      return Optional.empty();
+    }
+  }
+
   /**
    * A {@link Runnable} used to periodically fetch a new instance of the policies Cache.
    *
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
similarity index 51%
rename from metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
rename to metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
index 64c43dc8aa591..4ad14ed59c9c0 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
@@ -1,39 +1,40 @@
 package com.datahub.authorization;
 
-import com.datahub.authentication.Authentication;
 import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.ResourceFieldResolverProvider;
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.GroupMembershipFieldResolverProvider;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.entity.client.EntityClient;
-
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
 
-public class DefaultResourceSpecResolver implements ResourceSpecResolver {
-  private final List<ResourceFieldResolverProvider> _resourceFieldResolverProviders;
+public class DefaultEntitySpecResolver implements EntitySpecResolver {
+  private final List<EntityFieldResolverProvider> _entityFieldResolverProviders;
 
-  public DefaultResourceSpecResolver(Authentication systemAuthentication, EntityClient entityClient) {
-    _resourceFieldResolverProviders =
+  public DefaultEntitySpecResolver(Authentication systemAuthentication, EntityClient entityClient) {
+    _entityFieldResolverProviders =
         ImmutableList.of(new EntityTypeFieldResolverProvider(), new EntityUrnFieldResolverProvider(),
             new DomainFieldResolverProvider(entityClient, systemAuthentication),
             new OwnerFieldResolverProvider(entityClient, systemAuthentication),
-            new DataPlatformInstanceFieldResolverProvider(entityClient, systemAuthentication));
+            new DataPlatformInstanceFieldResolverProvider(entityClient, systemAuthentication),
+            new GroupMembershipFieldResolverProvider(entityClient, systemAuthentication));
   }
 
   @Override
-  public ResolvedResourceSpec resolve(ResourceSpec resourceSpec) {
-    return new ResolvedResourceSpec(resourceSpec, getFieldResolvers(resourceSpec));
+  public ResolvedEntitySpec resolve(EntitySpec entitySpec) {
+    return new ResolvedEntitySpec(entitySpec, getFieldResolvers(entitySpec));
   }
 
-  private Map<ResourceFieldType, FieldResolver> getFieldResolvers(ResourceSpec resourceSpec) {
-    return _resourceFieldResolverProviders.stream()
-        .collect(Collectors.toMap(ResourceFieldResolverProvider::getFieldType,
-            hydrator -> hydrator.getFieldResolver(resourceSpec)));
+  private Map<EntityFieldType, FieldResolver> getFieldResolvers(EntitySpec entitySpec) {
+    return _entityFieldResolverProviders.stream()
+        .collect(Collectors.toMap(EntityFieldResolverProvider::getFieldType,
+            hydrator -> hydrator.getFieldResolver(entitySpec)));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java
index 76ed18e2baf78..0dbb9cd132f8a 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java
@@ -26,7 +26,7 @@ private FilterUtils() {
    * Creates new PolicyMatchCriterion with field and value, using EQUAL PolicyMatchCondition.
    */
   @Nonnull
-  public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field, @Nonnull List<String> values) {
+  public static PolicyMatchCriterion newCriterion(@Nonnull EntityFieldType field, @Nonnull List<String> values) {
     return newCriterion(field, values, PolicyMatchCondition.EQUALS);
   }
 
@@ -34,7 +34,7 @@ public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field
    * Creates new PolicyMatchCriterion with field, value and PolicyMatchCondition.
    */
   @Nonnull
-  public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field, @Nonnull List<String> values,
+  public static PolicyMatchCriterion newCriterion(@Nonnull EntityFieldType field, @Nonnull List<String> values,
       @Nonnull PolicyMatchCondition policyMatchCondition) {
     return new PolicyMatchCriterion().setField(field.name())
         .setValues(new StringArray(values))
@@ -45,7 +45,7 @@ public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field
    * Creates new PolicyMatchFilter from a map of Criteria by removing null-valued Criteria and using EQUAL PolicyMatchCondition (default).
    */
   @Nonnull
-  public static PolicyMatchFilter newFilter(@Nullable Map<ResourceFieldType, List<String>> params) {
+  public static PolicyMatchFilter newFilter(@Nullable Map<EntityFieldType, List<String>> params) {
     if (params == null) {
       return EMPTY_FILTER;
     }
@@ -61,7 +61,7 @@ public static PolicyMatchFilter newFilter(@Nullable Map<ResourceFieldType, List<
    * Creates new PolicyMatchFilter from a single PolicyMatchCriterion with EQUAL PolicyMatchCondition (default).
    */
   @Nonnull
-  public static PolicyMatchFilter newFilter(@Nonnull ResourceFieldType field, @Nonnull List<String> values) {
+  public static PolicyMatchFilter newFilter(@Nonnull EntityFieldType field, @Nonnull List<String> values) {
     return newFilter(Collections.singletonMap(field, values));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
index 6a36fac7de4e0..f8c017ea74e1f 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
@@ -1,7 +1,6 @@
 package com.datahub.authorization;
 
 import com.datahub.authentication.Authentication;
-import com.google.common.collect.ImmutableSet;
 import com.linkedin.common.Owner;
 import com.linkedin.common.Ownership;
 import com.linkedin.common.urn.Urn;
@@ -11,8 +10,6 @@
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.EnvelopedAspectMap;
 import com.linkedin.entity.client.EntityClient;
-import com.linkedin.identity.GroupMembership;
-import com.linkedin.identity.NativeGroupMembership;
 import com.linkedin.identity.RoleMembership;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.authorization.PoliciesConfig;
@@ -23,7 +20,7 @@
 import com.linkedin.policy.PolicyMatchCriterion;
 import com.linkedin.policy.PolicyMatchCriterionArray;
 import com.linkedin.policy.PolicyMatchFilter;
-import java.net.URISyntaxException;
+
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
@@ -34,6 +31,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import javax.annotation.Nullable;
+
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
@@ -49,37 +47,22 @@ public class PolicyEngine {
 
   public PolicyEvaluationResult evaluatePolicy(
       final DataHubPolicyInfo policy,
-      final String actorStr,
+      final ResolvedEntitySpec resolvedActorSpec,
       final String privilege,
-      final Optional<ResolvedResourceSpec> resource) {
-    try {
-      // Currently Actor must be an urn. Consider whether this contract should be pushed up.
-      final Urn actor = Urn.createFromString(actorStr);
-      return evaluatePolicy(policy, actor, privilege, resource);
-    } catch (URISyntaxException e) {
-      log.error(String.format("Failed to bind actor %s to an URN. Actors must be URNs. Denying the authorization request", actorStr));
-      return PolicyEvaluationResult.DENIED;
-    }
-  }
-
-  public PolicyEvaluationResult evaluatePolicy(
-      final DataHubPolicyInfo policy,
-      final Urn actor,
-      final String privilege,
-      final Optional<ResolvedResourceSpec> resource) {
+      final Optional<ResolvedEntitySpec> resource) {
 
     final PolicyEvaluationContext context = new PolicyEvaluationContext();
     log.debug("Evaluating policy {}", policy.getDisplayName());
 
     // If the privilege is not in scope, deny the request.
-    if (!isPrivilegeMatch(privilege, policy.getPrivileges(), context)) {
+    if (!isPrivilegeMatch(privilege, policy.getPrivileges())) {
       log.debug("Policy denied based on irrelevant privileges {} for {}", policy.getPrivileges(), privilege);
       return PolicyEvaluationResult.DENIED;
     }
 
     // If policy is not applicable, deny the request
-    if (!isPolicyApplicable(policy, actor, resource, context)) {
-      log.debug("Policy does not applicable for actor {} and resource {}", actor, resource);
+    if (!isPolicyApplicable(policy, resolvedActorSpec, resource, context)) {
+      log.debug("Policy does not applicable for actor {} and resource {}", resolvedActorSpec.getSpec().getEntity(), resource);
       return PolicyEvaluationResult.DENIED;
     }
 
@@ -89,7 +72,7 @@ public PolicyEvaluationResult evaluatePolicy(
 
   public PolicyActors getMatchingActors(
       final DataHubPolicyInfo policy,
-      final Optional<ResolvedResourceSpec> resource) {
+      final Optional<ResolvedEntitySpec> resource) {
     final List<Urn> users = new ArrayList<>();
     final List<Urn> groups = new ArrayList<>();
     boolean allUsers = false;
@@ -126,8 +109,8 @@ public PolicyActors getMatchingActors(
 
   private boolean isPolicyApplicable(
       final DataHubPolicyInfo policy,
-      final Urn actor,
-      final Optional<ResolvedResourceSpec> resource,
+      final ResolvedEntitySpec resolvedActorSpec,
+      final Optional<ResolvedEntitySpec> resource,
       final PolicyEvaluationContext context
   ) {
 
@@ -137,25 +120,21 @@ private boolean isPolicyApplicable(
     }
 
     // If the resource is not in scope, deny the request.
-    if (!isResourceMatch(policy.getType(), policy.getResources(), resource, context)) {
+    if (!isResourceMatch(policy.getType(), policy.getResources(), resource)) {
       return false;
     }
 
     // If the actor does not match, deny the request.
-    if (!isActorMatch(actor, policy.getActors(), resource, context)) {
-      return false;
-    }
-
-    return true;
+    return isActorMatch(resolvedActorSpec, policy.getActors(), resource, context);
   }
 
   public List<String> getGrantedPrivileges(
       final List<DataHubPolicyInfo> policies,
-      final Urn actor,
-      final Optional<ResolvedResourceSpec> resource) {
+      final ResolvedEntitySpec resolvedActorSpec,
+      final Optional<ResolvedEntitySpec> resource) {
     PolicyEvaluationContext context = new PolicyEvaluationContext();
     return policies.stream()
-        .filter(policy -> isPolicyApplicable(policy, actor, resource, context))
+        .filter(policy -> isPolicyApplicable(policy, resolvedActorSpec, resource, context))
         .flatMap(policy -> policy.getPrivileges().stream())
         .distinct()
         .collect(Collectors.toList());
@@ -168,9 +147,8 @@ public List<String> getGrantedPrivileges(
    * If the policy is of type "METADATA", the resourceSpec parameter will be matched against the
    * resource filter defined on the policy.
    */
-  public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optional<ResolvedResourceSpec> resourceSpec) {
-    return isResourceMatch(policy.getType(), policy.getResources(), resourceSpec,
-        new PolicyEvaluationContext());
+  public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optional<ResolvedEntitySpec> resourceSpec) {
+    return isResourceMatch(policy.getType(), policy.getResources(), resourceSpec);
   }
 
   /**
@@ -178,8 +156,7 @@ public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optio
    */
   private boolean isPrivilegeMatch(
       final String requestPrivilege,
-      final List<String> policyPrivileges,
-      final PolicyEvaluationContext context) {
+      final List<String> policyPrivileges) {
     return policyPrivileges.contains(requestPrivilege);
   }
 
@@ -189,8 +166,7 @@ private boolean isPrivilegeMatch(
   private boolean isResourceMatch(
       final String policyType,
       final @Nullable DataHubResourceFilter policyResourceFilter,
-      final Optional<ResolvedResourceSpec> requestResource,
-      final PolicyEvaluationContext context) {
+      final Optional<ResolvedEntitySpec> requestResource) {
     if (PoliciesConfig.PLATFORM_POLICY_TYPE.equals(policyType)) {
       // Currently, platform policies have no associated resource.
       return true;
@@ -199,7 +175,7 @@ private boolean isResourceMatch(
       // No resource defined on the policy.
       return true;
     }
-    if (!requestResource.isPresent()) {
+    if (requestResource.isEmpty()) {
       // Resource filter present in policy, but no resource spec provided.
       log.debug("Resource filter present in policy, but no resource spec provided.");
       return false;
@@ -218,31 +194,31 @@ private PolicyMatchFilter getFilter(DataHubResourceFilter policyResourceFilter)
     }
     PolicyMatchCriterionArray criteria = new PolicyMatchCriterionArray();
     if (policyResourceFilter.hasType()) {
-      criteria.add(new PolicyMatchCriterion().setField(ResourceFieldType.RESOURCE_TYPE.name())
+      criteria.add(new PolicyMatchCriterion().setField(EntityFieldType.TYPE.name())
           .setValues(new StringArray(Collections.singletonList(policyResourceFilter.getType()))));
     }
     if (policyResourceFilter.hasType() && policyResourceFilter.hasResources()
         && !policyResourceFilter.isAllResources()) {
       criteria.add(
-          new PolicyMatchCriterion().setField(ResourceFieldType.RESOURCE_URN.name()).setValues(policyResourceFilter.getResources()));
+          new PolicyMatchCriterion().setField(EntityFieldType.URN.name()).setValues(policyResourceFilter.getResources()));
     }
     return new PolicyMatchFilter().setCriteria(criteria);
   }
 
-  private boolean checkFilter(final PolicyMatchFilter filter, final ResolvedResourceSpec resource) {
+  private boolean checkFilter(final PolicyMatchFilter filter, final ResolvedEntitySpec resource) {
     return filter.getCriteria().stream().allMatch(criterion -> checkCriterion(criterion, resource));
   }
 
-  private boolean checkCriterion(final PolicyMatchCriterion criterion, final ResolvedResourceSpec resource) {
-    ResourceFieldType resourceFieldType;
+  private boolean checkCriterion(final PolicyMatchCriterion criterion, final ResolvedEntitySpec resource) {
+    EntityFieldType entityFieldType;
     try {
-      resourceFieldType = ResourceFieldType.valueOf(criterion.getField().toUpperCase());
+      entityFieldType = EntityFieldType.valueOf(criterion.getField().toUpperCase());
     } catch (IllegalArgumentException e) {
       log.error("Unsupported field type {}", criterion.getField());
       return false;
     }
 
-    Set<String> fieldValues = resource.getFieldValues(resourceFieldType);
+    Set<String> fieldValues = resource.getFieldValues(entityFieldType);
     return criterion.getValues()
         .stream()
         .anyMatch(filterValue -> checkCondition(fieldValues, filterValue, criterion.getCondition()));
@@ -257,46 +233,51 @@ private boolean checkCondition(Set<String> fieldValues, String filterValue, Poli
   }
 
   /**
+   * Returns true if the actor portion of a DataHub policy matches a the actor being evaluated, false otherwise.
    * Returns true if the actor portion of a DataHub policy matches a the actor being evaluated, false otherwise.
    */
   private boolean isActorMatch(
-      final Urn actor,
+      final ResolvedEntitySpec resolvedActorSpec,
       final DataHubActorFilter actorFilter,
-      final Optional<ResolvedResourceSpec> resourceSpec,
+      final Optional<ResolvedEntitySpec> resourceSpec,
       final PolicyEvaluationContext context) {
 
     // 1. If the actor is a matching "User" in the actor filter, return true immediately.
-    if (isUserMatch(actor, actorFilter)) {
+    if (isUserMatch(resolvedActorSpec, actorFilter)) {
       return true;
     }
 
     // 2. If the actor is in a matching "Group" in the actor filter, return true immediately.
-    if (isGroupMatch(actor, actorFilter, context)) {
+    if (isGroupMatch(resolvedActorSpec, actorFilter, context)) {
       return true;
     }
 
     // 3. If the actor is the owner, either directly or indirectly via a group, return true immediately.
-    if (isOwnerMatch(actor, actorFilter, resourceSpec, context)) {
+    if (isOwnerMatch(resolvedActorSpec, actorFilter, resourceSpec, context)) {
       return true;
     }
 
     // 4. If the actor is in a matching "Role" in the actor filter, return true immediately.
-    return isRoleMatch(actor, actorFilter, context);
+    return isRoleMatch(resolvedActorSpec, actorFilter, context);
   }
 
-  private boolean isUserMatch(final Urn actor, final DataHubActorFilter actorFilter) {
+  private boolean isUserMatch(final ResolvedEntitySpec resolvedActorSpec, final DataHubActorFilter actorFilter) {
     // If the actor is a matching "User" in the actor filter, return true immediately.
     return actorFilter.isAllUsers() || (actorFilter.hasUsers() && Objects.requireNonNull(actorFilter.getUsers())
-        .stream()
-        .anyMatch(user -> user.equals(actor)));
+        .stream().map(Urn::toString)
+        .anyMatch(user -> user.equals(resolvedActorSpec.getSpec().getEntity())));
   }
 
-  private boolean isGroupMatch(final Urn actor, final DataHubActorFilter actorFilter, final PolicyEvaluationContext context) {
+  private boolean isGroupMatch(
+      final ResolvedEntitySpec resolvedActorSpec,
+      final DataHubActorFilter actorFilter,
+      final PolicyEvaluationContext context) {
     // If the actor is in a matching "Group" in the actor filter, return true immediately.
     if (actorFilter.isAllGroups() || actorFilter.hasGroups()) {
-      final Set<Urn> groups = resolveGroups(actor, context);
-      return actorFilter.isAllGroups() || (actorFilter.hasGroups() && Objects.requireNonNull(actorFilter.getGroups())
-          .stream()
+      final Set<String> groups = resolveGroups(resolvedActorSpec, context);
+      return (actorFilter.isAllGroups() && !groups.isEmpty())
+          || (actorFilter.hasGroups() && Objects.requireNonNull(actorFilter.getGroups())
+          .stream().map(Urn::toString)
           .anyMatch(groups::contains));
     }
     // If there are no groups on the policy, return false for the group match.
@@ -304,24 +285,24 @@ private boolean isGroupMatch(final Urn actor, final DataHubActorFilter actorFilt
   }
 
   private boolean isOwnerMatch(
-      final Urn actor,
+      final ResolvedEntitySpec resolvedActorSpec,
       final DataHubActorFilter actorFilter,
-      final Optional<ResolvedResourceSpec> requestResource,
+      final Optional<ResolvedEntitySpec> requestResource,
       final PolicyEvaluationContext context) {
     // If the policy does not apply to owners, or there is no resource to own, return false immediately.
-    if (!actorFilter.isResourceOwners() || !requestResource.isPresent()) {
+    if (!actorFilter.isResourceOwners() || requestResource.isEmpty()) {
       return false;
     }
     List<Urn> ownershipTypes = actorFilter.getResourceOwnersTypes();
-    return isActorOwner(actor, requestResource.get(), ownershipTypes, context);
+    return isActorOwner(resolvedActorSpec, requestResource.get(), ownershipTypes, context);
   }
 
-  private Set<String> getOwnersForType(ResourceSpec resourceSpec, List<Urn> ownershipTypes) {
-    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private Set<String> getOwnersForType(EntitySpec resourceSpec, List<Urn> ownershipTypes) {
+    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getEntity());
     EnvelopedAspect ownershipAspect;
     try {
       EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
-              Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME), _systemAuthentication);
+          Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME), _systemAuthentication);
       if (response == null || !response.getAspects().containsKey(Constants.OWNERSHIP_ASPECT_NAME)) {
         return Collections.emptySet();
       }
@@ -338,50 +319,56 @@ private Set<String> getOwnersForType(ResourceSpec resourceSpec, List<Urn> owners
     return ownersStream.map(owner -> owner.getOwner().toString()).collect(Collectors.toSet());
   }
 
-  private boolean isActorOwner(Urn actor, ResolvedResourceSpec resourceSpec, List<Urn> ownershipTypes, PolicyEvaluationContext context) {
+  private boolean isActorOwner(
+      final ResolvedEntitySpec resolvedActorSpec,
+      ResolvedEntitySpec resourceSpec, List<Urn> ownershipTypes,
+      PolicyEvaluationContext context) {
     Set<String> owners = this.getOwnersForType(resourceSpec.getSpec(), ownershipTypes);
-    if (isUserOwner(actor, owners)) {
-      return true;
-    }
-    final Set<Urn> groups = resolveGroups(actor, context);
-    if (isGroupOwner(groups, owners)) {
+    if (isUserOwner(resolvedActorSpec, owners)) {
       return true;
     }
-    return false;
+    final Set<String> groups = resolveGroups(resolvedActorSpec, context);
+
+    return isGroupOwner(groups, owners);
   }
 
-  private boolean isUserOwner(Urn actor, Set<String> owners) {
-    return owners.contains(actor.toString());
+  private boolean isUserOwner(final ResolvedEntitySpec resolvedActorSpec, Set<String> owners) {
+    return owners.contains(resolvedActorSpec.getSpec().getEntity());
   }
 
-  private boolean isGroupOwner(Set<Urn> groups, Set<String> owners) {
-    return groups.stream().anyMatch(group -> owners.contains(group.toString()));
+  private boolean isGroupOwner(Set<String> groups, Set<String> owners) {
+    return groups.stream().anyMatch(owners::contains);
   }
 
-  private boolean isRoleMatch(final Urn actor, final DataHubActorFilter actorFilter,
+  private boolean isRoleMatch(
+      final ResolvedEntitySpec resolvedActorSpec,
+      final DataHubActorFilter actorFilter,
       final PolicyEvaluationContext context) {
     // Can immediately return false if the actor filter does not have any roles
     if (!actorFilter.hasRoles()) {
       return false;
     }
     // If the actor has a matching "Role" in the actor filter, return true immediately.
-    Set<Urn> actorRoles = resolveRoles(actor, context);
+    Set<Urn> actorRoles = resolveRoles(resolvedActorSpec, context);
     return Objects.requireNonNull(actorFilter.getRoles())
         .stream()
         .anyMatch(actorRoles::contains);
   }
 
-  private Set<Urn> resolveRoles(Urn actor, PolicyEvaluationContext context) {
+  private Set<Urn> resolveRoles(final ResolvedEntitySpec resolvedActorSpec, PolicyEvaluationContext context) {
     if (context.roles != null) {
       return context.roles;
     }
 
+    String actor = resolvedActorSpec.getSpec().getEntity();
+
     Set<Urn> roles = new HashSet<>();
     final EnvelopedAspectMap aspectMap;
 
     try {
-      final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actor),
-          Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME), _systemAuthentication).get(actor);
+      Urn actorUrn = Urn.createFromString(actor);
+      final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actorUrn),
+          Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME), _systemAuthentication).get(actorUrn);
       if (corpUser == null || !corpUser.hasAspects()) {
         return roles;
       }
@@ -403,62 +390,25 @@ private Set<Urn> resolveRoles(Urn actor, PolicyEvaluationContext context) {
     return roles;
   }
 
-  private Set<Urn> resolveGroups(Urn actor, PolicyEvaluationContext context) {
+  private Set<String> resolveGroups(ResolvedEntitySpec resolvedActorSpec, PolicyEvaluationContext context) {
     if (context.groups != null) {
       return context.groups;
     }
 
-    Set<Urn> groups = new HashSet<>();
-    final EnvelopedAspectMap aspectMap;
-
-    try {
-      final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actor),
-              ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME), _systemAuthentication)
-          .get(actor);
-      if (corpUser == null || !corpUser.hasAspects()) {
-        return groups;
-      }
-      aspectMap = corpUser.getAspects();
-    } catch (Exception e) {
-      throw new RuntimeException(String.format("Failed to fetch %s and %s for urn %s", GROUP_MEMBERSHIP_ASPECT_NAME,
-          NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME, actor), e);
-    }
-
-    Optional<GroupMembership> maybeGroupMembership = resolveGroupMembership(aspectMap);
-    maybeGroupMembership.ifPresent(groupMembership -> groups.addAll(groupMembership.getGroups()));
-
-    Optional<NativeGroupMembership> maybeNativeGroupMembership = resolveNativeGroupMembership(aspectMap);
-    maybeNativeGroupMembership.ifPresent(
-        nativeGroupMembership -> groups.addAll(nativeGroupMembership.getNativeGroups()));
+    Set<String> groups = resolvedActorSpec.getGroupMembership();
 
     context.setGroups(groups); // Cache the groups.
     return groups;
   }
 
-  // TODO: Optimization - Cache the group membership. Refresh periodically.
-  private Optional<GroupMembership> resolveGroupMembership(final EnvelopedAspectMap aspectMap) {
-    if (aspectMap.containsKey(GROUP_MEMBERSHIP_ASPECT_NAME)) {
-      return Optional.of(new GroupMembership(aspectMap.get(GROUP_MEMBERSHIP_ASPECT_NAME).getValue().data()));
-    }
-    return Optional.empty();
-  }
-
-  private Optional<NativeGroupMembership> resolveNativeGroupMembership(final EnvelopedAspectMap aspectMap) {
-    if (aspectMap.containsKey(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)) {
-      return Optional.of(
-          new NativeGroupMembership(aspectMap.get(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME).getValue().data()));
-    }
-    return Optional.empty();
-  }
-
   /**
    * Class used to store state across a single Policy evaluation.
    */
   static class PolicyEvaluationContext {
-    private Set<Urn> groups;
+    private Set<String> groups;
     private Set<Urn> roles;
 
-    public void setGroups(Set<Urn> groups) {
+    public void setGroups(Set<String> groups) {
       this.groups = groups;
     }
 
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
index cd838625c2ca1..27cb8fcee8138 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
@@ -1,45 +1,45 @@
 package com.datahub.authorization.fieldresolverprovider;
 
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME;
+
 import com.datahub.authentication.Authentication;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
 import com.linkedin.common.DataPlatformInstance;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.client.EntityClient;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
 import java.util.Collections;
 import java.util.Objects;
-
-import static com.linkedin.metadata.Constants.*;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
 
 /**
  * Provides field resolver for domain given resourceSpec
  */
 @Slf4j
 @RequiredArgsConstructor
-public class DataPlatformInstanceFieldResolverProvider implements ResourceFieldResolverProvider {
+public class DataPlatformInstanceFieldResolverProvider implements EntityFieldResolverProvider {
 
   private final EntityClient _entityClient;
   private final Authentication _systemAuthentication;
 
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.DATA_PLATFORM_INSTANCE;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.DATA_PLATFORM_INSTANCE;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromFunction(resourceSpec, this::getDataPlatformInstance);
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getDataPlatformInstance);
   }
 
-  private FieldResolver.FieldValue getDataPlatformInstance(ResourceSpec resourceSpec) {
-    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private FieldResolver.FieldValue getDataPlatformInstance(EntitySpec entitySpec) {
+    Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
     // In the case that the entity is a platform instance, the associated platform instance entity is the instance itself
     if (entityUrn.getEntityType().equals(DATA_PLATFORM_INSTANCE_ENTITY_NAME)) {
       return FieldResolver.FieldValue.builder()
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
index 68c1dd4f644e5..25c2165f02b94 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
@@ -2,8 +2,8 @@
 
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.domain.DomainProperties;
@@ -27,23 +27,23 @@
 
 
 /**
- * Provides field resolver for domain given resourceSpec
+ * Provides field resolver for domain given entitySpec
  */
 @Slf4j
 @RequiredArgsConstructor
-public class DomainFieldResolverProvider implements ResourceFieldResolverProvider {
+public class DomainFieldResolverProvider implements EntityFieldResolverProvider {
 
   private final EntityClient _entityClient;
   private final Authentication _systemAuthentication;
 
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.DOMAIN;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.DOMAIN;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromFunction(resourceSpec, this::getDomains);
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getDomains);
   }
 
   private Set<Urn> getBatchedParentDomains(@Nonnull final Set<Urn> urns) {
@@ -78,8 +78,8 @@ private Set<Urn> getBatchedParentDomains(@Nonnull final Set<Urn> urns) {
     return parentUrns;
   }
 
-  private FieldResolver.FieldValue getDomains(ResourceSpec resourceSpec) {
-    final Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private FieldResolver.FieldValue getDomains(EntitySpec entitySpec) {
+    final Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
     // In the case that the entity is a domain, the associated domain is the domain itself
     if (entityUrn.getEntityType().equals(DOMAIN_ENTITY_NAME)) {
       return FieldResolver.FieldValue.builder()
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
new file mode 100644
index 0000000000000..a76db0ecb5102
--- /dev/null
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
@@ -0,0 +1,22 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authorization.FieldResolver;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
+
+
+/**
+ * Base class for defining a class that provides the field resolver for the given field type
+ */
+public interface EntityFieldResolverProvider {
+
+  /**
+   * Field that this hydrator is hydrating
+   */
+  EntityFieldType getFieldType();
+
+  /**
+   * Return resolver for fetching the field values given the entity
+   */
+  FieldResolver getFieldResolver(EntitySpec entitySpec);
+}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
index 58e3d78ce8c3b..187f696904947 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
@@ -1,22 +1,22 @@
 package com.datahub.authorization.fieldresolverprovider;
 
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import java.util.Collections;
 
 
 /**
- * Provides field resolver for entity type given resourceSpec
+ * Provides field resolver for entity type given entitySpec
  */
-public class EntityTypeFieldResolverProvider implements ResourceFieldResolverProvider {
+public class EntityTypeFieldResolverProvider implements EntityFieldResolverProvider {
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.RESOURCE_TYPE;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.TYPE;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromValues(Collections.singleton(resourceSpec.getType()));
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromValues(Collections.singleton(entitySpec.getType()));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
index b9d98f1dcbac0..2f5c4a7c6c961 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
@@ -1,22 +1,22 @@
 package com.datahub.authorization.fieldresolverprovider;
 
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import java.util.Collections;
 
 
 /**
- * Provides field resolver for entity urn given resourceSpec
+ * Provides field resolver for entity urn given entitySpec
  */
-public class EntityUrnFieldResolverProvider implements ResourceFieldResolverProvider {
+public class EntityUrnFieldResolverProvider implements EntityFieldResolverProvider {
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.RESOURCE_URN;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.URN;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromValues(Collections.singleton(resourceSpec.getResource()));
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromValues(Collections.singleton(entitySpec.getEntity()));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
new file mode 100644
index 0000000000000..8db029632d7e2
--- /dev/null
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
@@ -0,0 +1,78 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.FieldResolver;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
+import com.google.common.collect.ImmutableSet;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.identity.NativeGroupMembership;
+import com.linkedin.metadata.Constants;
+import com.linkedin.identity.GroupMembership;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static com.linkedin.metadata.Constants.GROUP_MEMBERSHIP_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME;
+
+
+/**
+ * Provides field resolver for owners given entitySpec
+ */
+@Slf4j
+@RequiredArgsConstructor
+public class GroupMembershipFieldResolverProvider implements EntityFieldResolverProvider {
+
+  private final EntityClient _entityClient;
+  private final Authentication _systemAuthentication;
+
+  @Override
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.GROUP_MEMBERSHIP;
+  }
+
+  @Override
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getGroupMembership);
+  }
+
+  private FieldResolver.FieldValue getGroupMembership(EntitySpec entitySpec) {
+    Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
+    EnvelopedAspect groupMembershipAspect;
+    EnvelopedAspect nativeGroupMembershipAspect;
+    List<Urn> groups = new ArrayList<>();
+    try {
+      EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
+              ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME), _systemAuthentication);
+      if (response == null
+              || !(response.getAspects().containsKey(Constants.GROUP_MEMBERSHIP_ASPECT_NAME)
+              || response.getAspects().containsKey(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME))) {
+        return FieldResolver.emptyFieldValue();
+      }
+      if (response.getAspects().containsKey(Constants.GROUP_MEMBERSHIP_ASPECT_NAME)) {
+        groupMembershipAspect = response.getAspects().get(Constants.GROUP_MEMBERSHIP_ASPECT_NAME);
+        GroupMembership groupMembership = new GroupMembership(groupMembershipAspect.getValue().data());
+        groups.addAll(groupMembership.getGroups());
+      }
+      if (response.getAspects().containsKey(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)) {
+        nativeGroupMembershipAspect = response.getAspects().get(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME);
+        NativeGroupMembership nativeGroupMembership = new NativeGroupMembership(nativeGroupMembershipAspect.getValue().data());
+        groups.addAll(nativeGroupMembership.getNativeGroups());
+      }
+    } catch (Exception e) {
+      log.error("Error while retrieving group membership aspect for urn {}", entityUrn, e);
+      return FieldResolver.emptyFieldValue();
+    }
+    return FieldResolver.FieldValue.builder()
+        .values(groups.stream().map(Urn::toString).collect(Collectors.toSet()))
+        .build();
+  }
+}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
index 20ec6a09377c8..bdd652d1d3871 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
@@ -2,8 +2,8 @@
 
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.Ownership;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
@@ -18,27 +18,27 @@
 
 
 /**
- * Provides field resolver for owners given resourceSpec
+ * Provides field resolver for owners given entitySpec
  */
 @Slf4j
 @RequiredArgsConstructor
-public class OwnerFieldResolverProvider implements ResourceFieldResolverProvider {
+public class OwnerFieldResolverProvider implements EntityFieldResolverProvider {
 
   private final EntityClient _entityClient;
   private final Authentication _systemAuthentication;
 
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.OWNER;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.OWNER;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromFunction(resourceSpec, this::getOwners);
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getOwners);
   }
 
-  private FieldResolver.FieldValue getOwners(ResourceSpec resourceSpec) {
-    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private FieldResolver.FieldValue getOwners(EntitySpec entitySpec) {
+    Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
     EnvelopedAspect ownershipAspect;
     try {
       EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java
deleted file mode 100644
index 4ba4200f8035e..0000000000000
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package com.datahub.authorization.fieldresolverprovider;
-
-import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
-
-
-/**
- * Base class for defining a class that provides the field resolver for the given field type
- */
-public interface ResourceFieldResolverProvider {
-
-  /**
-   * Field that this hydrator is hydrating
-   */
-  ResourceFieldType getFieldType();
-
-  /**
-   * Return resolver for fetching the field values given the resource
-   */
-  FieldResolver getFieldResolver(ResourceSpec resourceSpec);
-}
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
index 2e48123fb1813..24ecfa6fefc85 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
@@ -158,7 +158,7 @@ public void testSystemAuthentication() throws Exception {
 
     // Validate that the System Actor is authorized, even if there is no policy.
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         new Actor(ActorType.USER, DATAHUB_SYSTEM_CLIENT_ID).toUrnStr(),
@@ -172,7 +172,7 @@ public void testSystemAuthentication() throws Exception {
   @Test
   public void testAuthorizeGranted() throws Exception {
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -186,7 +186,7 @@ public void testAuthorizeGranted() throws Exception {
   @Test
   public void testAuthorizeNotGranted() throws Exception {
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     // Policy for this privilege is inactive.
     AuthorizationRequest request = new AuthorizationRequest(
@@ -203,7 +203,7 @@ public void testAllowAllMode() throws Exception {
 
     _dataHubAuthorizer.setMode(DataHubAuthorizer.AuthorizationMode.ALLOW_ALL);
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     // Policy for this privilege is inactive.
     AuthorizationRequest request = new AuthorizationRequest(
@@ -219,7 +219,7 @@ public void testAllowAllMode() throws Exception {
   public void testInvalidateCache() throws Exception {
 
     // First make sure that the default policies are as expected.
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -250,7 +250,7 @@ public void testInvalidateCache() throws Exception {
   public void testAuthorizedActorsActivePolicy() throws Exception {
     final AuthorizedActors actors =
         _dataHubAuthorizer.authorizedActors("EDIT_ENTITY_TAGS", // Should be inside the active policy.
-            Optional.of(new ResourceSpec("dataset", "urn:li:dataset:1")));
+            Optional.of(new EntitySpec("dataset", "urn:li:dataset:1")));
 
     assertTrue(actors.isAllUsers());
     assertTrue(actors.isAllGroups());
@@ -272,7 +272,7 @@ public void testAuthorizedActorsActivePolicy() throws Exception {
 
   @Test
   public void testAuthorizationOnDomainWithPrivilegeIsAllowed() {
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -285,7 +285,7 @@ public void testAuthorizationOnDomainWithPrivilegeIsAllowed() {
 
   @Test
   public void testAuthorizationOnDomainWithParentPrivilegeIsAllowed() {
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -298,7 +298,7 @@ public void testAuthorizationOnDomainWithParentPrivilegeIsAllowed() {
 
   @Test
   public void testAuthorizationOnDomainWithoutPrivilegeIsDenied() {
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -334,7 +334,7 @@ private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List<String> p
     resourceFilter.setType("dataset");
 
     if (domain != null) {
-      resourceFilter.setFilter(FilterUtils.newFilter(ImmutableMap.of(ResourceFieldType.DOMAIN, Collections.singletonList(domain.toString()))));
+      resourceFilter.setFilter(FilterUtils.newFilter(ImmutableMap.of(EntityFieldType.DOMAIN, Collections.singletonList(domain.toString()))));
     }
 
     dataHubPolicyInfo.setResources(resourceFilter);
@@ -398,6 +398,6 @@ private Map<Urn, EntityResponse> createDomainPropertiesBatchResponse(@Nullable f
   }
 
   private AuthorizerContext createAuthorizerContext(final Authentication systemAuthentication, final EntityClient entityClient) {
-    return new AuthorizerContext(Collections.emptyMap(), new DefaultResourceSpecResolver(systemAuthentication, entityClient));
+    return new AuthorizerContext(Collections.emptyMap(), new DefaultEntitySpecResolver(systemAuthentication, entityClient));
   }
 }
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
index 99d8fee309d91..be8c948f8ef89 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
@@ -11,15 +11,12 @@
 import com.linkedin.common.OwnershipType;
 import com.linkedin.common.UrnArray;
 import com.linkedin.common.urn.Urn;
-import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.data.template.StringArray;
 import com.linkedin.entity.Aspect;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.EnvelopedAspectMap;
 import com.linkedin.entity.client.EntityClient;
-import com.linkedin.identity.CorpUserInfo;
-import com.linkedin.identity.GroupMembership;
 import com.linkedin.identity.RoleMembership;
 import com.linkedin.metadata.Constants;
 import com.linkedin.policy.DataHubActorFilter;
@@ -45,22 +42,19 @@ public class PolicyEngineTest {
 
   private static final String AUTHORIZED_PRINCIPAL = "urn:li:corpuser:datahub";
   private static final String UNAUTHORIZED_PRINCIPAL = "urn:li:corpuser:unauthorized";
-
   private static final String AUTHORIZED_GROUP = "urn:li:corpGroup:authorizedGroup";
-
   private static final String RESOURCE_URN = "urn:li:dataset:test";
-
   private static final String DOMAIN_URN = "urn:li:domain:domain1";
-
   private static final String OWNERSHIP_TYPE_URN = "urn:li:ownershipType:__system__technical_owner";
-
   private static final String OTHER_OWNERSHIP_TYPE_URN = "urn:li:ownershipType:__system__data_steward";
 
   private EntityClient _entityClient;
   private PolicyEngine _policyEngine;
 
   private Urn authorizedUserUrn;
+  private ResolvedEntitySpec resolvedAuthorizedUserSpec;
   private Urn unauthorizedUserUrn;
+  private ResolvedEntitySpec resolvedUnauthorizedUserSpec;
   private Urn resourceUrn;
 
   @BeforeMethod
@@ -68,29 +62,34 @@ public void setupTest() throws Exception {
     _entityClient = Mockito.mock(EntityClient.class);
     _policyEngine = new PolicyEngine(Mockito.mock(Authentication.class), _entityClient);
 
-    // Init mocks.
-    EntityResponse authorizedEntityResponse = createAuthorizedEntityResponse();
     authorizedUserUrn = Urn.createFromString(AUTHORIZED_PRINCIPAL);
+    resolvedAuthorizedUserSpec = buildEntityResolvers(CORP_USER_ENTITY_NAME, AUTHORIZED_PRINCIPAL,
+        Collections.emptySet(), Collections.emptySet(), Collections.singleton(AUTHORIZED_GROUP));
+    unauthorizedUserUrn = Urn.createFromString(UNAUTHORIZED_PRINCIPAL);
+    resolvedUnauthorizedUserSpec = buildEntityResolvers(CORP_USER_ENTITY_NAME, UNAUTHORIZED_PRINCIPAL);
+    resourceUrn = Urn.createFromString(RESOURCE_URN);
+
+    // Init role membership mocks.
+    EntityResponse authorizedEntityResponse = createAuthorizedEntityResponse();
     authorizedEntityResponse.setUrn(authorizedUserUrn);
     Map<Urn, EntityResponse> authorizedEntityResponseMap =
         Collections.singletonMap(authorizedUserUrn, authorizedEntityResponse);
-    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), any(),
-        any())).thenReturn(authorizedEntityResponseMap);
+    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
+        eq(Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME)), any())).thenReturn(authorizedEntityResponseMap);
 
     EntityResponse unauthorizedEntityResponse = createUnauthorizedEntityResponse();
-    unauthorizedUserUrn = Urn.createFromString(UNAUTHORIZED_PRINCIPAL);
     unauthorizedEntityResponse.setUrn(unauthorizedUserUrn);
     Map<Urn, EntityResponse> unauthorizedEntityResponseMap =
         Collections.singletonMap(unauthorizedUserUrn, unauthorizedEntityResponse);
-    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(unauthorizedUserUrn)), any(),
-        any())).thenReturn(unauthorizedEntityResponseMap);
+    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(unauthorizedUserUrn)),
+        eq(Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME)), any())).thenReturn(unauthorizedEntityResponseMap);
 
+    // Init ownership type mocks.
     EntityResponse entityResponse = new EntityResponse();
     EnvelopedAspectMap envelopedAspectMap = new EnvelopedAspectMap();
     envelopedAspectMap.put(OWNERSHIP_ASPECT_NAME,
         new EnvelopedAspect().setValue(new com.linkedin.entity.Aspect(createOwnershipAspect(true, true).data())));
     entityResponse.setAspects(envelopedAspectMap);
-    resourceUrn = Urn.createFromString(RESOURCE_URN);
     Map<Urn, EntityResponse> mockMap = mock(Map.class);
     when(_entityClient.batchGetV2(any(), eq(Collections.singleton(resourceUrn)),
         eq(Collections.singleton(OWNERSHIP_ASPECT_NAME)), any())).thenReturn(mockMap);
@@ -120,9 +119,9 @@ public void testEvaluatePolicyInactivePolicyState() {
     resourceFilter.setAllResources(true);
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertFalse(result.isGranted());
@@ -149,9 +148,9 @@ public void testEvaluatePolicyPrivilegeFilterNoMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_OWNERS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_OWNERS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -176,7 +175,8 @@ public void testEvaluatePlatformPolicyPrivilegeFilterMatch() throws Exception {
     dataHubPolicyInfo.setActors(actorFilter);
 
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "MANAGE_POLICIES", Optional.empty());
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "MANAGE_POLICIES",
+            Optional.empty());
     assertTrue(result.isGranted());
 
     // Verify no network calls
@@ -208,10 +208,10 @@ public void testEvaluatePolicyActorFilterUserMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert Authorized user can edit entity tags.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertTrue(result1.isGranted());
@@ -245,10 +245,10 @@ public void testEvaluatePolicyActorFilterUserNoMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, "urn:li:corpuser:test", "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, buildEntityResolvers(CORP_USER_ENTITY_NAME, "urn:li:corpuser:test"), "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertFalse(result2.isGranted());
@@ -270,7 +270,7 @@ public void testEvaluatePolicyActorFilterGroupMatch() throws Exception {
 
     final DataHubActorFilter actorFilter = new DataHubActorFilter();
     final UrnArray groupsUrnArray = new UrnArray();
-    groupsUrnArray.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup"));
+    groupsUrnArray.add(Urn.createFromString(AUTHORIZED_GROUP));
     actorFilter.setGroups(groupsUrnArray);
     actorFilter.setResourceOwners(false);
     actorFilter.setAllUsers(false);
@@ -282,16 +282,15 @@ public void testEvaluatePolicyActorFilterGroupMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags, because of group membership.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
-    // Verify we are only calling for group during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -307,7 +306,7 @@ public void testEvaluatePolicyActorFilterGroupNoMatch() throws Exception {
 
     final DataHubActorFilter actorFilter = new DataHubActorFilter();
     final UrnArray groupsUrnArray = new UrnArray();
-    groupsUrnArray.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup"));
+    groupsUrnArray.add(Urn.createFromString(AUTHORIZED_GROUP));
     actorFilter.setGroups(groupsUrnArray);
     actorFilter.setResourceOwners(false);
     actorFilter.setAllUsers(false);
@@ -319,16 +318,15 @@ public void testEvaluatePolicyActorFilterGroupNoMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result2.isGranted());
 
-    // Verify we are only calling for group during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
-        eq(Collections.singleton(unauthorizedUserUrn)), any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -357,17 +355,17 @@ public void testEvaluatePolicyActorFilterRoleMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags.
     PolicyEngine.PolicyEvaluationResult authorizedResult =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertTrue(authorizedResult.isGranted());
 
     // Verify we are only calling for roles during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
+    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
+        eq(Collections.singleton(authorizedUserUrn)), any(), any());
   }
 
   @Test
@@ -396,10 +394,10 @@ public void testEvaluatePolicyActorFilterNoRoleMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags.
     PolicyEngine.PolicyEvaluationResult unauthorizedResult =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertFalse(unauthorizedResult.isGranted());
@@ -431,16 +429,16 @@ public void testEvaluatePolicyActorFilterAllUsersMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags, because of group membership.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result2.isGranted());
 
@@ -470,24 +468,21 @@ public void testEvaluatePolicyActorFilterAllGroupsMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags, because of group membership.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
-    assertTrue(result2.isGranted());
+    assertFalse(result2.isGranted());
 
-    // Verify we are only calling for group during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
-        eq(Collections.singleton(unauthorizedUserUrn)), any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -519,17 +514,17 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersMatch() throws Except
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(),
+            Collections.emptySet());
     // Assert authorized user can edit entity tags, because he is a user owner.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
-    // Ensure no calls for group membership.
-    verify(_entityClient, times(0)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        eq(null), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -562,13 +557,17 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersTypeMatch() throws Ex
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-            buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+            buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(),
+                Collections.emptySet());
     
     PolicyEngine.PolicyEvaluationResult result1 =
-            _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+            _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
                     Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
+
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -601,13 +600,16 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersTypeNoMatch() throws
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-            buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+            buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(), Collections.emptySet());
 
     PolicyEngine.PolicyEvaluationResult result1 =
-            _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+            _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
                     Optional.of(resourceSpec));
     assertFalse(result1.isGranted());
+
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -639,17 +641,17 @@ public void testEvaluatePolicyActorFilterGroupResourceOwnersMatch() throws Excep
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_GROUP), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_GROUP), Collections.emptySet(),
+            Collections.emptySet());
     // Assert authorized user can edit entity tags, because he is a user owner.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
-    // Ensure that caching of groups is working with 1 call to entity client for each principal.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -673,16 +675,15 @@ public void testEvaluatePolicyActorFilterGroupResourceOwnersNoMatch() throws Exc
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result2.isGranted());
 
-    // Ensure that caching of groups is working with 1 call to entity client for each principal.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
-        eq(Collections.singleton(unauthorizedUserUrn)), any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -706,10 +707,10 @@ public void testEvaluatePolicyResourceFilterAllResourcesMatch() throws Exception
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", "urn:li:dataset:random"); // A dataset Authorized principal _does not own_.
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", "urn:li:dataset:random"); // A dataset Authorized principal _does not own_.
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -738,9 +739,9 @@ public void testEvaluatePolicyResourceFilterAllResourcesNoMatch() throws Excepti
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("chart", RESOURCE_URN); // Notice: Not a dataset.
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("chart", RESOURCE_URN); // Notice: Not a dataset.
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -773,9 +774,9 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatchLegacy() throws
     resourceFilter.setResources(resourceUrns);
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -801,13 +802,13 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatch() throws Excep
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"),
-            ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN))));
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"),
+            EntityFieldType.URN, Collections.singletonList(RESOURCE_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -833,14 +834,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceNoMatch() throws Exc
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"),
-            ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN))));
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"),
+            EntityFieldType.URN, Collections.singletonList(RESOURCE_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -866,14 +867,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatchDomain() throws
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN,
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN,
             Collections.singletonList(DOMAIN_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(), Collections.singleton(DOMAIN_URN));
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(), Collections.singleton(DOMAIN_URN), Collections.emptySet());
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -899,14 +900,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceNoMatchDomain() thro
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN,
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN,
             Collections.singletonList(DOMAIN_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
-        Collections.singleton("urn:li:domain:domain2")); // Domain doesn't match
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
+        Collections.singleton("urn:li:domain:domain2"), Collections.emptySet()); // Domain doesn't match
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -933,7 +934,7 @@ public void testGetGrantedPrivileges() throws Exception {
 
     final DataHubResourceFilter resourceFilter1 = new DataHubResourceFilter();
     resourceFilter1.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN,
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN,
             Collections.singletonList(DOMAIN_URN))));
     dataHubPolicyInfo1.setResources(resourceFilter1);
 
@@ -954,8 +955,8 @@ public void testGetGrantedPrivileges() throws Exception {
 
     final DataHubResourceFilter resourceFilter2 = new DataHubResourceFilter();
     resourceFilter2.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"),
-            ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN))));
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"),
+            EntityFieldType.URN, Collections.singletonList(RESOURCE_URN))));
     dataHubPolicyInfo2.setResources(resourceFilter2);
 
     // Policy 3, match dataset type and owner (legacy resource filter)
@@ -981,25 +982,25 @@ public void testGetGrantedPrivileges() throws Exception {
     final List<DataHubPolicyInfo> policies =
         ImmutableList.of(dataHubPolicyInfo1, dataHubPolicyInfo2, dataHubPolicyInfo3);
 
-    assertEquals(_policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.empty()),
+    assertEquals(_policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.empty()),
         Collections.emptyList());
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
-        Collections.singleton(DOMAIN_URN)); // Everything matches
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Everything matches
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_1", "PRIVILEGE_2_1", "PRIVILEGE_2_2"));
 
-    resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
-        Collections.singleton("urn:li:domain:domain2")); // Domain doesn't match
+    resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
+        Collections.singleton("urn:li:domain:domain2"), Collections.emptySet()); // Domain doesn't match
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_2_1", "PRIVILEGE_2_2"));
 
-    resourceSpec = buildResourceResolvers("dataset", "urn:li:dataset:random", Collections.emptySet(),
-        Collections.singleton(DOMAIN_URN)); // Resource doesn't match
+    resourceSpec = buildEntityResolvers("dataset", "urn:li:dataset:random", Collections.emptySet(),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Resource doesn't match
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_1"));
 
     final EntityResponse entityResponse = new EntityResponse();
@@ -1008,16 +1009,16 @@ public void testGetGrantedPrivileges() throws Exception {
     entityResponse.setAspects(aspectMap);
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
-    resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
-        Collections.singleton(DOMAIN_URN)); // Is owner
+    resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Is owner
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_1", "PRIVILEGE_2_1", "PRIVILEGE_2_2", "PRIVILEGE_3"));
 
-    resourceSpec = buildResourceResolvers("chart", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
-        Collections.singleton(DOMAIN_URN)); // Resource type doesn't match
+    resourceSpec = buildEntityResolvers("chart", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Resource type doesn't match
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         Collections.emptyList());
   }
 
@@ -1050,9 +1051,9 @@ public void testGetMatchingActorsResourceMatch() throws Exception {
     resourceFilter.setResources(resourceUrns);
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL, AUTHORIZED_GROUP),
-            Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL, AUTHORIZED_GROUP),
+            Collections.emptySet(), Collections.emptySet());
     PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec));
 
     assertTrue(actors.allUsers());
@@ -1101,8 +1102,8 @@ public void testGetMatchingActorsNoResourceMatch() throws Exception {
     resourceFilter.setResources(resourceUrns);
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
     PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec));
 
     assertFalse(actors.allUsers());
@@ -1155,21 +1156,6 @@ private EntityResponse createAuthorizedEntityResponse() throws URISyntaxExceptio
     final EntityResponse entityResponse = new EntityResponse();
     final EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
 
-    final CorpUserInfo userInfo = new CorpUserInfo();
-    userInfo.setActive(true);
-    userInfo.setFullName("Data Hub");
-    userInfo.setFirstName("Data");
-    userInfo.setLastName("Hub");
-    userInfo.setEmail("datahub@gmail.com");
-    userInfo.setTitle("Admin");
-    aspectMap.put(CORP_USER_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(userInfo.data())));
-
-    final GroupMembership groupsAspect = new GroupMembership();
-    final UrnArray groups = new UrnArray();
-    groups.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup"));
-    groupsAspect.setGroups(groups);
-    aspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(groupsAspect.data())));
-
     final RoleMembership rolesAspect = new RoleMembership();
     final UrnArray roles = new UrnArray();
     roles.add(Urn.createFromString("urn:li:dataHubRole:admin"));
@@ -1184,21 +1170,6 @@ private EntityResponse createUnauthorizedEntityResponse() throws URISyntaxExcept
     final EntityResponse entityResponse = new EntityResponse();
     final EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
 
-    final CorpUserInfo userInfo = new CorpUserInfo();
-    userInfo.setActive(true);
-    userInfo.setFullName("Unauthorized User");
-    userInfo.setFirstName("Unauthorized");
-    userInfo.setLastName("User");
-    userInfo.setEmail("Unauth");
-    userInfo.setTitle("Engineer");
-    aspectMap.put(CORP_USER_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(userInfo.data())));
-
-    final GroupMembership groupsAspect = new GroupMembership();
-    final UrnArray groups = new UrnArray();
-    groups.add(Urn.createFromString("urn:li:corpGroup:unauthorizedGroup"));
-    groupsAspect.setGroups(groups);
-    aspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(groupsAspect.data())));
-
     final RoleMembership rolesAspect = new RoleMembership();
     final UrnArray roles = new UrnArray();
     roles.add(Urn.createFromString("urn:li:dataHubRole:reader"));
@@ -1209,17 +1180,18 @@ private EntityResponse createUnauthorizedEntityResponse() throws URISyntaxExcept
     return entityResponse;
   }
 
-  public static ResolvedResourceSpec buildResourceResolvers(String entityType, String entityUrn) {
-    return buildResourceResolvers(entityType, entityUrn, Collections.emptySet(), Collections.emptySet());
+  public static ResolvedEntitySpec buildEntityResolvers(String entityType, String entityUrn) {
+    return buildEntityResolvers(entityType, entityUrn, Collections.emptySet(), Collections.emptySet(), Collections.emptySet());
   }
 
-  public static ResolvedResourceSpec buildResourceResolvers(String entityType, String entityUrn, Set<String> owners,
-      Set<String> domains) {
-    return new ResolvedResourceSpec(new ResourceSpec(entityType, entityUrn),
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE,
-            FieldResolver.getResolverFromValues(Collections.singleton(entityType)), ResourceFieldType.RESOURCE_URN,
-            FieldResolver.getResolverFromValues(Collections.singleton(entityUrn)), ResourceFieldType.OWNER,
-            FieldResolver.getResolverFromValues(owners), ResourceFieldType.DOMAIN,
-            FieldResolver.getResolverFromValues(domains)));
+  public static ResolvedEntitySpec buildEntityResolvers(String entityType, String entityUrn, Set<String> owners,
+      Set<String> domains, Set<String> groups) {
+    return new ResolvedEntitySpec(new EntitySpec(entityType, entityUrn),
+        ImmutableMap.of(EntityFieldType.TYPE,
+            FieldResolver.getResolverFromValues(Collections.singleton(entityType)), EntityFieldType.URN,
+            FieldResolver.getResolverFromValues(Collections.singleton(entityUrn)), EntityFieldType.OWNER,
+            FieldResolver.getResolverFromValues(owners), EntityFieldType.DOMAIN,
+            FieldResolver.getResolverFromValues(domains), EntityFieldType.GROUP_MEMBERSHIP,
+            FieldResolver.getResolverFromValues(groups)));
   }
 }
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
index e525c602c2620..b2343bbb01509 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
@@ -1,8 +1,21 @@
 package com.datahub.authorization.fieldresolverprovider;
 
+import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME;
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
 import com.datahub.authentication.Authentication;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.DataPlatformInstance;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.entity.Aspect;
@@ -11,29 +24,21 @@
 import com.linkedin.entity.EnvelopedAspectMap;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.r2.RemoteInvocationException;
+import java.net.URISyntaxException;
+import java.util.Collections;
+import java.util.Set;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-import java.net.URISyntaxException;
-import java.util.Collections;
-import java.util.Set;
-
-import static com.linkedin.metadata.Constants.*;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.eq;
-import static org.mockito.Mockito.*;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
 public class DataPlatformInstanceFieldResolverProviderTest {
 
   private static final String DATA_PLATFORM_INSTANCE_URN =
       "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)";
   private static final String RESOURCE_URN =
       "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.testDataset,PROD)";
-  private static final ResourceSpec RESOURCE_SPEC = new ResourceSpec(DATASET_ENTITY_NAME, RESOURCE_URN);
+  private static final EntitySpec RESOURCE_SPEC = new EntitySpec(DATASET_ENTITY_NAME, RESOURCE_URN);
 
   @Mock
   private EntityClient entityClientMock;
@@ -51,12 +56,12 @@ public void setup() {
 
   @Test
   public void shouldReturnDataPlatformInstanceType() {
-    assertEquals(ResourceFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType());
+    assertEquals(EntityFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType());
   }
 
   @Test
   public void shouldReturnFieldValueWithResourceSpecIfTypeIsDataPlatformInstance() {
-    var resourceSpec = new ResourceSpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, DATA_PLATFORM_INSTANCE_URN);
+    var resourceSpec = new EntitySpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, DATA_PLATFORM_INSTANCE_URN);
 
     var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(resourceSpec);
 
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java
new file mode 100644
index 0000000000000..54675045b4413
--- /dev/null
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java
@@ -0,0 +1,212 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.linkedin.common.UrnArray;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.Aspect;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.EnvelopedAspectMap;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.identity.GroupMembership;
+import com.linkedin.identity.NativeGroupMembership;
+import com.linkedin.r2.RemoteInvocationException;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.net.URISyntaxException;
+import java.util.Set;
+
+import static com.linkedin.metadata.Constants.*;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.*;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+public class GroupMembershipFieldResolverProviderTest {
+
+  private static final String CORPGROUP_URN = "urn:li:corpGroup:groupname";
+  private static final String NATIVE_CORPGROUP_URN = "urn:li:corpGroup:nativegroupname";
+  private static final String RESOURCE_URN = "urn:li:dataset:(urn:li:dataPlatform:testPlatform,testDataset,PROD)";
+  private static final EntitySpec RESOURCE_SPEC = new EntitySpec(DATASET_ENTITY_NAME, RESOURCE_URN);
+
+  @Mock
+  private EntityClient entityClientMock;
+  @Mock
+  private Authentication systemAuthenticationMock;
+
+  private GroupMembershipFieldResolverProvider groupMembershipFieldResolverProvider;
+
+  @BeforeMethod
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    groupMembershipFieldResolverProvider =
+        new GroupMembershipFieldResolverProvider(entityClientMock, systemAuthenticationMock);
+  }
+
+  @Test
+  public void shouldReturnGroupsMembershipType() {
+    assertEquals(EntityFieldType.GROUP_MEMBERSHIP, groupMembershipFieldResolverProvider.getFieldType());
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResponseIsNull() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(null);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResourceDoesNotBelongToAnyGroup()
+      throws RemoteInvocationException, URISyntaxException {
+    var entityResponseMock = mock(EntityResponse.class);
+    when(entityResponseMock.getAspects()).thenReturn(new EnvelopedAspectMap());
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenThereIsAnException() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenThrow(new RemoteInvocationException());
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithOnlyGroupsOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var groupMembership = new GroupMembership().setGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(CORPGROUP_URN))));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(groupMembership.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(CORPGROUP_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithOnlyNativeGroupsOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var nativeGroupMembership = new NativeGroupMembership().setNativeGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(NATIVE_CORPGROUP_URN))));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(nativeGroupMembership.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(NATIVE_CORPGROUP_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithGroupsAndNativeGroupsOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var groupMembership = new GroupMembership().setGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(CORPGROUP_URN))));
+    var nativeGroupMembership = new NativeGroupMembership().setNativeGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(NATIVE_CORPGROUP_URN))));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(groupMembership.data())));
+    envelopedAspectMap.put(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(nativeGroupMembership.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(CORPGROUP_URN, NATIVE_CORPGROUP_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+}
\ No newline at end of file
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
index bf50a0c7b6473..b90257870a8b2 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
@@ -2,12 +2,12 @@
 
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.DataHubAuthorizer;
-import com.datahub.authorization.DefaultResourceSpecResolver;
+import com.datahub.authorization.DefaultEntitySpecResolver;
 import com.datahub.plugins.PluginConstant;
 import com.datahub.authentication.Authentication;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.datahub.authorization.AuthorizerContext;
-import com.datahub.authorization.ResourceSpecResolver;
+import com.datahub.authorization.EntitySpecResolver;
 import com.datahub.plugins.common.PluginConfig;
 import com.datahub.plugins.common.PluginPermissionManager;
 import com.datahub.plugins.common.PluginType;
@@ -64,7 +64,7 @@ public class AuthorizerChainFactory {
   @Scope("singleton")
   @Nonnull
   protected AuthorizerChain getInstance() {
-    final ResourceSpecResolver resolver = initResolver();
+    final EntitySpecResolver resolver = initResolver();
 
     // Extract + initialize customer authorizers from application configs.
     final List<Authorizer> authorizers = new ArrayList<>(initCustomAuthorizers(resolver));
@@ -79,11 +79,11 @@ protected AuthorizerChain getInstance() {
     return new AuthorizerChain(authorizers, dataHubAuthorizer);
   }
 
-  private ResourceSpecResolver initResolver() {
-    return new DefaultResourceSpecResolver(systemAuthentication, entityClient);
+  private EntitySpecResolver initResolver() {
+    return new DefaultEntitySpecResolver(systemAuthentication, entityClient);
   }
 
-  private List<Authorizer> initCustomAuthorizers(ResourceSpecResolver resolver) {
+  private List<Authorizer> initCustomAuthorizers(EntitySpecResolver resolver) {
     final List<Authorizer> customAuthorizers = new ArrayList<>();
 
     Path pluginBaseDirectory = Paths.get(configurationProvider.getDatahub().getPlugin().getAuth().getPath());
@@ -99,7 +99,7 @@ private List<Authorizer> initCustomAuthorizers(ResourceSpecResolver resolver) {
     return customAuthorizers;
   }
 
-  private void registerAuthorizer(List<Authorizer> customAuthorizers, ResourceSpecResolver resolver, Config config) {
+  private void registerAuthorizer(List<Authorizer> customAuthorizers, EntitySpecResolver resolver, Config config) {
     PluginConfigFactory authorizerPluginPluginConfigFactory = new PluginConfigFactory(config);
     // Load only Authorizer configuration from plugin config factory
     List<PluginConfig> authorizers =
diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
index ade49c876f168..207c2284e2673 100644
--- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
+++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
@@ -45,8 +45,7 @@
 import io.datahubproject.openapi.util.OpenApiEntitiesUtil;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.linkedin.metadata.models.EntitySpec;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.google.common.collect.ImmutableList;
 import com.datahub.authorization.AuthUtil;
@@ -377,7 +376,7 @@ public ResponseEntity<S> scroll(@Valid Boolean systemMetadata, @Valid List<Strin
                                     @Valid String scrollId, @Valid List<String> sort, @Valid SortOrder sortOrder, @Valid String query) {
 
         Authentication authentication = AuthenticationContext.getAuthentication();
-        EntitySpec entitySpec = OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz);
+        com.linkedin.metadata.models.EntitySpec entitySpec = OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz);
         checkScrollAuthorized(authentication, entitySpec);
 
         // TODO multi-field sort
@@ -410,12 +409,12 @@ public ResponseEntity<S> scroll(@Valid Boolean systemMetadata, @Valid List<Strin
         return ResponseEntity.of(OpenApiEntitiesUtil.convertToScrollResponse(_scrollRespClazz, result.getScrollId(), entities));
     }
 
-    private void checkScrollAuthorized(Authentication authentication, EntitySpec entitySpec) {
+    private void checkScrollAuthorized(Authentication authentication, com.linkedin.metadata.models.EntitySpec entitySpec) {
         String actorUrnStr = authentication.getActor().toUrnStr();
         DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup(ImmutableList.of(new ConjunctivePrivilegeGroup(
                 ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE.getType()))));
 
-        List<Optional<ResourceSpec>> resourceSpecs = List.of(Optional.of(new ResourceSpec(entitySpec.getName(), "")));
+        List<Optional<EntitySpec>> resourceSpecs = List.of(Optional.of(new EntitySpec(entitySpec.getName(), "")));
         if (_restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizationChain, actorUrnStr, resourceSpecs, orGroup)) {
             throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities.");
         }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java
index 6439e2f31f7b0..898f768cf999a 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java
@@ -8,7 +8,7 @@
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -93,8 +93,8 @@ public ResponseEntity<UrnResponseMap> getEntities(
         ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE.getType())
     )));
 
-    List<Optional<ResourceSpec>> resourceSpecs = entityUrns.stream()
-        .map(urn -> Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<Optional<EntitySpec>> resourceSpecs = entityUrns.stream()
+        .map(urn -> Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs, orGroup)) {
       throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities.");
@@ -175,8 +175,8 @@ public ResponseEntity<List<RollbackRunResultDto>> deleteEntities(
         .map(URLDecoder::decode)
         .map(UrnUtils::getUrn).collect(Collectors.toSet());
 
-    List<Optional<ResourceSpec>> resourceSpecs = entityUrns.stream()
-        .map(urn -> Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<Optional<EntitySpec>> resourceSpecs = entityUrns.stream()
+        .map(urn -> Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs, orGroup)) {
       UnauthorizedException unauthorizedException = new UnauthorizedException(actorUrnStr + " is unauthorized to delete entities.");
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java
index 1e37170f37b3b..4641fed3a8610 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java
@@ -8,7 +8,7 @@
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
@@ -131,8 +131,8 @@ public ResponseEntity<RelatedEntitiesResult> getRelationships(
             // Re-using GET_ENTITY_PRIVILEGE here as it doesn't make sense to split the privileges between these APIs.
         )));
 
-    List<Optional<ResourceSpec>> resourceSpecs =
-        Collections.singletonList(Optional.of(new ResourceSpec(entityUrn.getEntityType(), entityUrn.toString())));
+    List<Optional<EntitySpec>> resourceSpecs =
+        Collections.singletonList(Optional.of(new EntitySpec(entityUrn.getEntityType(), entityUrn.toString())));
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs,
         orGroup)) {
       throw new UnauthorizedException(actorUrnStr + " is unauthorized to get relationships.");
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java
index 5a0ce2e314e1b..fbde9e8072002 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java
@@ -6,7 +6,7 @@
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -67,7 +67,7 @@ public ResponseEntity<List<ChangeTransaction>> getTimeline(
     Urn urn = Urn.createFromString(rawUrn);
     Authentication authentication = AuthenticationContext.getAuthentication();
     String actorUrnStr = authentication.getActor().toUrnStr();
-    ResourceSpec resourceSpec = new ResourceSpec(urn.getEntityType(), rawUrn);
+    EntitySpec resourceSpec = new EntitySpec(urn.getEntityType(), rawUrn);
     DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup(
         ImmutableList.of(new ConjunctivePrivilegeGroup(ImmutableList.of(PoliciesConfig.GET_TIMELINE_PRIVILEGE.getType()))));
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorized(_authorizerChain, actorUrnStr, Optional.of(resourceSpec), orGroup)) {
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
index 2b3e84e2df20f..21dc5a4c8a0d6 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
@@ -5,7 +5,7 @@
 import com.datahub.authorization.AuthUtil;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -27,7 +27,6 @@
 import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl;
 import com.linkedin.metadata.entity.transactions.AspectsBatch;
 import com.linkedin.metadata.entity.validation.ValidationException;
-import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.entity.AspectUtils;
 import com.linkedin.metadata.utils.EntityKeyUtils;
 import com.linkedin.metadata.utils.metrics.MetricUtils;
@@ -378,11 +377,11 @@ public static GenericAspect convertGenericAspect(@Nonnull io.datahubproject.open
 
   public static boolean authorizeProposals(List<com.linkedin.mxe.MetadataChangeProposal> proposals, EntityService entityService,
       Authorizer authorizer, String actorUrnStr, DisjunctivePrivilegeGroup orGroup) {
-    List<Optional<ResourceSpec>> resourceSpecs = proposals.stream()
+    List<Optional<EntitySpec>> resourceSpecs = proposals.stream()
         .map(proposal -> {
-            EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(proposal.getEntityType());
+            com.linkedin.metadata.models.EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(proposal.getEntityType());
             Urn entityUrn = EntityKeyUtils.getUrnFromProposal(proposal, entitySpec.getKeyAspectSpec());
-            return Optional.of(new ResourceSpec(proposal.getEntityType(), entityUrn.toString()));
+            return Optional.of(new EntitySpec(proposal.getEntityType(), entityUrn.toString()));
         })
         .collect(Collectors.toList());
     return AuthUtil.isAuthorizedForResources(authorizer, actorUrnStr, resourceSpecs, orGroup);
@@ -513,7 +512,7 @@ public static RollbackRunResultDto mapRollbackRunResult(RollbackRunResult rollba
   }
 
   public static UpsertAspectRequest createStatusRemoval(Urn urn, EntityService entityService) {
-    EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(urn.getEntityType());
+    com.linkedin.metadata.models.EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(urn.getEntityType());
     if (entitySpec == null || !entitySpec.getAspectSpecMap().containsKey(STATUS_ASPECT_NAME)) {
       throw new IllegalArgumentException("Entity type is not valid for soft deletes: " + urn.getEntityType());
     }
diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
index b6bc282f10b65..442ac1b0d287b 100644
--- a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
+++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
@@ -4,7 +4,7 @@
 import com.datahub.authorization.AuthorizationResult;
 import com.datahub.authorization.AuthorizedActors;
 import com.datahub.authorization.AuthorizerContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.PluginConstant;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import java.io.BufferedReader;
@@ -74,7 +74,7 @@ public AuthorizationResult authorize(@Nonnull AuthorizationRequest request) {
   }
 
   @Override
-  public AuthorizedActors authorizedActors(String privilege, Optional<ResourceSpec> resourceSpec) {
+  public AuthorizedActors authorizedActors(String privilege, Optional<EntitySpec> resourceSpec) {
     return new AuthorizedActors("ALL", null, null, true, true);
   }
 }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
index 936c8bb67e645..af76af90ce77f 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
@@ -3,7 +3,7 @@
 import com.codahale.metrics.MetricRegistry;
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
@@ -20,7 +20,6 @@
 import com.linkedin.metadata.entity.AspectUtils;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.entity.validation.ValidationException;
-import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.SortCriterion;
 import com.linkedin.metadata.restli.RestliUtil;
@@ -123,7 +122,7 @@ public Task<AnyRecord> get(@Nonnull String urnStr, @QueryParam("aspect") @Option
       Authentication authentication = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-          new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+          new EntitySpec(urn.getEntityType(), urn.toString()))) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get aspect for " + urn);
       }
       final VersionedAspect aspect = _entityService.getVersionedAspect(urn, aspectName, version);
@@ -154,7 +153,7 @@ public Task<GetTimeseriesAspectValuesResponse> getTimeseriesAspectValues(
       Authentication authentication = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_TIMESERIES_ASPECT_PRIVILEGE),
-          new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+          new EntitySpec(urn.getEntityType(), urn.toString()))) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get timeseries aspect for " + urn);
       }
       GetTimeseriesAspectValuesResponse response = new GetTimeseriesAspectValuesResponse();
@@ -193,11 +192,11 @@ public Task<String> ingestProposal(
     }
 
     Authentication authentication = AuthenticationContext.getAuthentication();
-    EntitySpec entitySpec = _entityService.getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType());
+    com.linkedin.metadata.models.EntitySpec entitySpec = _entityService.getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType());
     Urn urn = EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to modify entity " + urn);
     }
     String actorUrnStr = authentication.getActor().toUrnStr();
@@ -249,7 +248,7 @@ public Task<Integer> getCount(@ActionParam(PARAM_ASPECT) @Nonnull String aspectN
       Authentication authentication = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE),
-          (ResourceSpec) null)) {
+          (EntitySpec) null)) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get aspect counts.");
       }
       return _entityService.getCountAspect(aspectName, urnLike);
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java
index 3ff22fb767676..9bab846d1bdcc 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.Urn;
@@ -123,9 +123,9 @@ public Task<RollbackResponse> rollback(@ActionParam("runId") @Nonnull String run
         List<AspectRowSummary> aspectRowsToDelete;
         aspectRowsToDelete = _systemMetadataService.findByRunId(runId, doHardDelete, 0, ESUtils.MAX_RESULT_SIZE);
         Set<String> urns = aspectRowsToDelete.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet();
-        List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
+        List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
             .map(UrnUtils::getUrn)
-            .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+            .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
             .collect(Collectors.toList());
         Authentication auth = AuthenticationContext.getAuthentication();
         if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
index f6dedfb9a07c6..3ee98b3244718 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
@@ -3,7 +3,7 @@
 import com.codahale.metrics.MetricRegistry;
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
@@ -173,7 +173,7 @@ public Task<AnyRecord> get(@Nonnull String urnStr,
     final Urn urn = Urn.createFromString(urnStr);
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new ResourceSpec(urn.getEntityType(), urnStr))) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity " + urn);
     }
@@ -198,8 +198,8 @@ public Task<Map<String, AnyRecord>> batchGet(@Nonnull Set<String> urnStrs,
     for (final String urnStr : urnStrs) {
       urns.add(Urn.createFromString(urnStr));
     }
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
@@ -242,7 +242,7 @@ public Task<Void> ingest(@ActionParam(PARAM_ENTITY) @Nonnull Entity entity,
     final Urn urn = com.datahub.util.ModelUtils.getUrnFromSnapshotUnion(entity.getValue());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to edit entity " + urn);
     }
@@ -273,10 +273,10 @@ public Task<Void> batchIngest(@ActionParam(PARAM_ENTITIES) @Nonnull Entity[] ent
 
     Authentication authentication = AuthenticationContext.getAuthentication();
     String actorUrnStr = authentication.getActor().toUrnStr();
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = Arrays.stream(entities)
+    List<java.util.Optional<EntitySpec>> resourceSpecs = Arrays.stream(entities)
         .map(Entity::getValue)
         .map(com.datahub.util.ModelUtils::getUrnFromSnapshotUnion)
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), resourceSpecs)) {
@@ -322,7 +322,7 @@ public Task<SearchResult> search(@ActionParam(PARAM_ENTITY) @Nonnull String enti
       @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -347,7 +347,7 @@ public Task<SearchResult> searchAcrossEntities(@ActionParam(PARAM_ENTITIES) @Opt
       @ActionParam(PARAM_COUNT) int count, @ActionParam(PARAM_SEARCH_FLAGS) @Optional SearchFlags searchFlags) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -391,7 +391,7 @@ public Task<LineageSearchResult> searchAcrossLineage(@ActionParam(PARAM_URN) @No
       @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) throws URISyntaxException {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -443,7 +443,7 @@ public Task<ListResult> list(@ActionParam(PARAM_ENTITY) @Nonnull String entityNa
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -462,7 +462,7 @@ public Task<AutoCompleteResult> autocomplete(@ActionParam(PARAM_ENTITY) @Nonnull
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -479,7 +479,7 @@ public Task<BrowseResult> browse(@ActionParam(PARAM_ENTITY) @Nonnull String enti
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -497,7 +497,7 @@ public Task<StringArray> getBrowsePaths(
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity: " + urn);
     }
@@ -546,9 +546,9 @@ public Task<RollbackResponse> deleteEntities(@ActionParam("registryId") @Optiona
       log.info("found {} rows to delete...", stringifyRowCount(aspectRowsToDelete.size()));
       response.setAspectsAffected(aspectRowsToDelete.size());
       Set<String> urns = aspectRowsToDelete.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet();
-      List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
+      List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
           .map(UrnUtils::getUrn)
-          .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+          .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
           .collect(Collectors.toList());
       Authentication auth = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
@@ -590,7 +590,7 @@ public Task<DeleteEntityResponse> deleteEntity(@ActionParam(PARAM_URN) @Nonnull
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity: " + urnStr);
     }
@@ -638,7 +638,7 @@ private Long deleteTimeseriesAspects(@Nonnull Urn urn, @Nullable Long startTimeM
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity " + urn);
     }
@@ -678,7 +678,7 @@ public Task<DeleteReferencesResponse> deleteReferencesTo(@ActionParam(PARAM_URN)
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urnStr))) {
+        new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity " + urnStr);
     }
@@ -695,7 +695,7 @@ public Task<DeleteReferencesResponse> deleteReferencesTo(@ActionParam(PARAM_URN)
   public Task<Void> setWriteable(@ActionParam(PARAM_VALUE) @Optional("true") @Nonnull Boolean value) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SET_WRITEABLE_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SET_WRITEABLE_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to enable and disable write mode.");
     }
@@ -712,7 +712,7 @@ public Task<Void> setWriteable(@ActionParam(PARAM_VALUE) @Optional("true") @Nonn
   public Task<Long> getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String entityName) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity counts.");
     }
@@ -725,7 +725,7 @@ public Task<Long> getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String
   public Task<LongMap> batchGetTotalEntityCount(@ActionParam(PARAM_ENTITIES) @Nonnull String[] entityNames) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity counts.");
     }
@@ -739,7 +739,7 @@ public Task<ListUrnsResult> listUrns(@ActionParam(PARAM_ENTITY) @Nonnull String
       @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count) throws URISyntaxException {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -757,10 +757,10 @@ public Task<String> applyRetention(@ActionParam(PARAM_START) @Optional @Nullable
                                      @ActionParam(PARAM_URN) @Optional @Nullable String urn
                                      ) {
     Authentication auth = AuthenticationContext.getAuthentication();
-    ResourceSpec resourceSpec = null;
+    EntitySpec resourceSpec = null;
     if (StringUtils.isNotBlank(urn)) {
       Urn resource = UrnUtils.getUrn(urn);
-      resourceSpec = new ResourceSpec(resource.getEntityType(), resource.toString());
+      resourceSpec = new EntitySpec(resource.getEntityType(), resource.toString());
     }
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.APPLY_RETENTION_PRIVILEGE), resourceSpec)) {
@@ -781,7 +781,7 @@ public Task<SearchResult> filter(@ActionParam(PARAM_ENTITY) @Nonnull String enti
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -799,7 +799,7 @@ public Task<Boolean> exists(@ActionParam(PARAM_URN) @Nonnull String urnStr) thro
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urnStr))) {
+        new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized get entity: " + urnStr);
     }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
index 7efb93c0f50e6..0c3e93273b863 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.entity.EntityResponse;
@@ -68,7 +68,7 @@ public Task<EntityResponse> get(@Nonnull String urnStr,
     final Urn urn = Urn.createFromString(urnStr);
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new ResourceSpec(urn.getEntityType(), urnStr))) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity " + urn);
     }
@@ -96,8 +96,8 @@ public Task<Map<Urn, EntityResponse>> batchGet(@Nonnull Set<String> urnStrs,
       urns.add(Urn.createFromString(urnStr));
     }
     Authentication auth = AuthenticationContext.getAuthentication();
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), resourceSpecs)) {
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java
index fd5c3507b5408..05b7e6b3ff24b 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.VersionedUrn;
 import com.linkedin.common.urn.Urn;
@@ -65,9 +65,9 @@ public Task<Map<Urn, EntityResponse>> batchGetVersioned(
       @QueryParam(PARAM_ENTITY_TYPE) @Nonnull String entityType,
       @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames) {
     Authentication auth = AuthenticationContext.getAuthentication();
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = versionedUrnStrs.stream()
+    List<java.util.Optional<EntitySpec>> resourceSpecs = versionedUrnStrs.stream()
         .map(versionedUrn -> UrnUtils.getUrn(versionedUrn.getUrn()))
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), resourceSpecs)) {
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java
index 313d16333f9e9..4a8e74c89039a 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.EntityRelationship;
 import com.linkedin.common.EntityRelationshipArray;
@@ -107,7 +107,7 @@ public Task<EntityRelationships> get(@QueryParam("urn") @Nonnull String rawUrn,
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity lineage: " + rawUrn);
     }
@@ -142,7 +142,7 @@ public UpdateResponse delete(@QueryParam("urn") @Nonnull String rawUrn) throws E
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity: " + rawUrn);
     }
@@ -162,7 +162,7 @@ public Task<EntityLineageResult> getLineage(@ActionParam(PARAM_URN) @Nonnull Str
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity lineage: " + urnStr);
     }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java
index 188e5ae18ee8f..12586b66495a9 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java
@@ -2,7 +2,7 @@
 
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -37,10 +37,10 @@ public static String restoreIndices(
       @Nonnull EntityService entityService
   ) {
     Authentication authentication = AuthenticationContext.getAuthentication();
-    ResourceSpec resourceSpec = null;
+    EntitySpec resourceSpec = null;
     if (StringUtils.isNotBlank(urn)) {
       Urn resource = UrnUtils.getUrn(urn);
-      resourceSpec = new ResourceSpec(resource.getEntityType(), resource.toString());
+      resourceSpec = new EntitySpec(resource.getEntityType(), resource.toString());
     }
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, authorizer, ImmutableList.of(PoliciesConfig.RESTORE_INDICES_PRIVILEGE),
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java
index f36841bb4abae..a8018074497c4 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java
@@ -3,7 +3,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.entity.Entity;
 import com.linkedin.metadata.authorization.PoliciesConfig;
@@ -54,7 +54,7 @@ public Task<Void> producePlatformEvent(
       @ActionParam("event") @Nonnull PlatformEvent event) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.PRODUCE_PLATFORM_EVENT_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.PRODUCE_PLATFORM_EVENT_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to produce platform events.");
     }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
index 5c3b90a84aec1..9949556c99b81 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
@@ -4,7 +4,7 @@
 import com.datahub.authorization.AuthUtil;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.metadata.authorization.PoliciesConfig;
@@ -82,13 +82,13 @@ public static RestLiServiceException invalidArgumentsException(@Nullable String
   }
 
   public static boolean isAuthorized(@Nonnull Authentication authentication, @Nonnull Authorizer authorizer,
-      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nonnull final List<java.util.Optional<ResourceSpec>> resources) {
+      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nonnull final List<java.util.Optional<EntitySpec>> resources) {
     DisjunctivePrivilegeGroup orGroup = convertPrivilegeGroup(privileges);
     return AuthUtil.isAuthorizedForResources(authorizer, authentication.getActor().toUrnStr(), resources, orGroup);
   }
 
   public static boolean isAuthorized(@Nonnull Authentication authentication, @Nonnull Authorizer authorizer,
-      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nullable final ResourceSpec resource) {
+      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nullable final EntitySpec resource) {
     DisjunctivePrivilegeGroup orGroup = convertPrivilegeGroup(privileges);
     return AuthUtil.isAuthorized(authorizer, authentication.getActor().toUrnStr(), java.util.Optional.ofNullable(resource), orGroup);
   }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
index be70cf9c494ef..02d413301f3b4 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.StreamReadConstraints;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -125,7 +125,7 @@ public Task<Void> batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregat
     return RestliUtil.toTask(() -> {
       Authentication auth = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-          && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), (ResourceSpec) null)) {
+          && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), (EntitySpec) null)) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
             "User is unauthorized to edit entities.");
       }
@@ -323,7 +323,7 @@ public Task<UsageQueryResult> query(@ActionParam(PARAM_RESOURCE) @Nonnull String
       Urn resourceUrn = UrnUtils.getUrn(resource);
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE),
-          new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
+          new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
             "User is unauthorized to query usage.");
       }
@@ -383,7 +383,7 @@ public Task<UsageQueryResult> queryRange(@ActionParam(PARAM_RESOURCE) @Nonnull S
     Urn resourceUrn = UrnUtils.getUrn(resource);
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE),
-            new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
+            new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to query usage.");
     }

From d04d25bf428aa442b08a4011fcac81b3d1526a86 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Thu, 12 Oct 2023 15:50:20 -0400
Subject: [PATCH 42/98] smoke test(): Query plus filter search test (#8993)

---
 .../e2e/search/query_and_filter_search.js     | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js

diff --git a/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js
new file mode 100644
index 0000000000000..4637310b86496
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js
@@ -0,0 +1,57 @@
+describe("auto-complete dropdown, filter plus query search test", () => {
+
+  const platformQuerySearch = (query,test_id,active_filter) => {
+    cy.visit("/");
+    cy.get("input[data-testid=search-input]").type(query);
+    cy.get(`[data-testid="quick-filter-urn:li:dataPlatform:${test_id}"]`).click();
+    cy.focused().type("{enter}").wait(3000);
+    cy.url().should(
+      "include",
+      `?filter_platform___false___EQUAL___0=urn%3Ali%3AdataPlatform%3A${test_id}`
+    );
+    cy.get('[data-testid="search-input"]').should("have.value", query);
+    cy.get(`[data-testid="active-filter-${active_filter}"]`).should("be.visible");
+    cy.contains("of 0 results").should("not.exist");
+    cy.contains(/of [0-9]+ results/);
+  }
+
+  const entityQuerySearch = (query,test_id,active_filter) => {
+    cy.visit("/");
+    cy.get("input[data-testid=search-input]").type(query);
+    cy.get(`[data-testid="quick-filter-${test_id}"]`).click();
+    cy.focused().type("{enter}").wait(3000);
+    cy.url().should(
+      "include",
+      `?filter__entityType___false___EQUAL___0=${test_id}`
+    );
+    cy.get('[data-testid="search-input"]').should("have.value", query);
+    cy.get(`[data-testid="active-filter-${active_filter}"]`).should("be.visible");
+    cy.contains("of 0 results").should("not.exist");
+    cy.contains(/of [0-9]+ results/);
+  }
+
+  it("verify the 'filter by' section + query (result in search page with query applied + filter applied)", () => {
+    // Platform query plus filter test
+    cy.loginWithCredentials();
+    // Airflow
+    platformQuerySearch ("cypress","airflow","Airflow");
+    // BigQuery
+    platformQuerySearch ("cypress","bigquery","BigQuery");
+    // dbt
+    platformQuerySearch ("cypress","dbt","dbt");
+    // Hive 
+    platformQuerySearch ("cypress","hive","Hive");
+
+    // Entity type query plus filter test
+    // Datasets
+    entityQuerySearch ("cypress","DATASET","Datasets");
+    // Dashboards
+    entityQuerySearch ("cypress","DASHBOARD","Dashboards");
+    // Pipelines
+    entityQuerySearch ("cypress","DATA_FLOW","Pipelines");
+    // Domains
+    entityQuerySearch ("Marketing","DOMAIN","Domains");
+    // Glossary Terms
+    entityQuerySearch ("cypress","GLOSSARY_TERM","Glossary Terms");
+  });
+});
\ No newline at end of file

From a8f0080c08b5c816f0dae9d3bef07ea00220541e Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Fri, 13 Oct 2023 00:14:45 +0200
Subject: [PATCH 43/98] feat(ingest/teradata): Teradata source (#8977)

---
 .../docs/sources/teradata/teradata_pre.md     |  28 +++
 .../docs/sources/teradata/teradata_recipe.yml |  17 ++
 metadata-ingestion/setup.py                   |   3 +
 .../datahub/ingestion/source/sql/teradata.py  | 228 ++++++++++++++++++
 .../testing/check_sql_parser_result.py        |   5 +-
 .../src/datahub/utilities/sqlglot_lineage.py  |   5 +
 .../test_teradata_default_normalization.json  |  38 +++
 .../unit/sql_parsing/test_sqlglot_lineage.py  |  42 ++++
 8 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 metadata-ingestion/docs/sources/teradata/teradata_pre.md
 create mode 100644 metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json

diff --git a/metadata-ingestion/docs/sources/teradata/teradata_pre.md b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
new file mode 100644
index 0000000000000..eb59caa29eb52
--- /dev/null
+++ b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
@@ -0,0 +1,28 @@
+### Prerequisites
+1. Create a user which has access to the database you want to ingest.
+    ```sql
+    CREATE USER datahub FROM <database> AS PASSWORD = <password> PERM = 20000000;
+    ```
+2. Create a user with the following privileges:
+    ```sql
+    GRANT SELECT ON dbc.columns TO datahub;
+    GRANT SELECT ON dbc.databases TO datahub;
+    GRANT SELECT ON dbc.tables TO datahub;
+    GRANT SELECT ON DBC.All_RI_ChildrenV TO datahub;
+    GRANT SELECT ON DBC.ColumnsV TO datahub;
+    GRANT SELECT ON DBC.IndicesV TO datahub;
+    GRANT SELECT ON dbc.TableTextV TO datahub;
+    GRANT SELECT ON dbc.TablesV TO datahub;
+    GRANT SELECT ON dbc.dbqlogtbl TO datahub; -- if lineage or usage extraction is enabled
+    ```
+   
+    If you want to run profiling, you need to grant select permission on all the tables you want to profile.
+
+3. If linege or usage extraction is enabled, please, check if query logging is enabled and it is set to size which
+will fit for your queries (the default query text size Teradata captures is max 200 chars)
+   An example how you can set it for all users:
+    ```sql
+    REPLACE QUERY LOGGING LIMIT SQLTEXT=2000 ON ALL;
+    ```
+   See more here about query logging:
+      [https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs]()
diff --git a/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
new file mode 100644
index 0000000000000..8cf07ba4c3a01
--- /dev/null
+++ b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
@@ -0,0 +1,17 @@
+pipeline_name: my-teradata-ingestion-pipeline
+source:
+  type: teradata
+  config:
+    host_port: "myteradatainstance.teradata.com:1025"
+    #platform_instance: "myteradatainstance"
+    username: myuser
+    password: mypassword
+    #database_pattern:
+    #  allow:
+    #    - "demo_user"
+    #  ignoreCase: true
+    include_table_lineage: true
+    include_usage_statistics: true
+    stateful_ingestion:
+      enabled: true
+sink:
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 61e7b684682a4..3ea9a2ea61d74 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -373,6 +373,7 @@
     # FIXME: I don't think tableau uses sqllineage anymore so we should be able
     # to remove that dependency.
     "tableau": {"tableauserverclient>=0.17.0"} | sqllineage_lib | sqlglot_lib,
+    "teradata": sql_common | {"teradatasqlalchemy>=17.20.0.0"},
     "trino": sql_common | trino,
     "starburst-trino-usage": sql_common | usage_common | trino,
     "nifi": {"requests", "packaging", "requests-gssapi"},
@@ -499,6 +500,7 @@
             "s3",
             "snowflake",
             "tableau",
+            "teradata",
             "trino",
             "hive",
             "starburst-trino-usage",
@@ -597,6 +599,7 @@
         "tableau = datahub.ingestion.source.tableau:TableauSource",
         "openapi = datahub.ingestion.source.openapi:OpenApiSource",
         "metabase = datahub.ingestion.source.metabase:MetabaseSource",
+        "teradata = datahub.ingestion.source.sql.teradata:TeradataSource",
         "trino = datahub.ingestion.source.sql.trino:TrinoSource",
         "starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource",
         "nifi = datahub.ingestion.source.nifi:NifiSource",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
new file mode 100644
index 0000000000000..dd11cd840bed9
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -0,0 +1,228 @@
+import logging
+from dataclasses import dataclass
+from typing import Iterable, Optional, Set, Union
+
+# This import verifies that the dependencies are available.
+import teradatasqlalchemy  # noqa: F401
+import teradatasqlalchemy.types as custom_types
+from pydantic.fields import Field
+from sqlalchemy import create_engine
+from sqlalchemy.engine import Engine
+
+from datahub.configuration.common import AllowDenyPattern
+from datahub.configuration.time_window_config import BaseTimeWindowConfig
+from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SourceCapability,
+    SupportStatus,
+    capability,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type
+from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
+from datahub.ingestion.source.sql.two_tier_sql_source import (
+    TwoTierSQLAlchemyConfig,
+    TwoTierSQLAlchemySource,
+)
+from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
+from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
+from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
+from datahub.metadata.com.linkedin.pegasus2avro.schema import (
+    BytesTypeClass,
+    TimeTypeClass,
+)
+from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+register_custom_type(custom_types.JSON, BytesTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY_TO_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY_TO_MINUTE, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY_TO_HOUR, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_MINUTE, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_MINUTE_TO_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_HOUR, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_HOUR_TO_MINUTE, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_HOUR_TO_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_MONTH, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_YEAR, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_YEAR_TO_MONTH, TimeTypeClass)
+register_custom_type(custom_types.MBB, BytesTypeClass)
+register_custom_type(custom_types.MBR, BytesTypeClass)
+register_custom_type(custom_types.GEOMETRY, BytesTypeClass)
+register_custom_type(custom_types.TDUDT, BytesTypeClass)
+register_custom_type(custom_types.XML, BytesTypeClass)
+
+
+@dataclass
+class TeradataReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
+    num_queries_parsed: int = 0
+    num_table_parse_failures: int = 0
+
+
+class BaseTeradataConfig(TwoTierSQLAlchemyConfig):
+    scheme = Field(default="teradatasql", description="database scheme")
+
+
+class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
+    database_pattern = Field(
+        default=AllowDenyPattern(deny=["dbc"]),
+        description="Regex patterns for databases to filter in ingestion.",
+    )
+    include_table_lineage = Field(
+        default=False,
+        description="Whether to include table lineage in the ingestion. "
+        "This requires to have the table lineage feature enabled.",
+    )
+
+    usage: BaseUsageConfig = Field(
+        description="The usage config to use when generating usage statistics",
+        default=BaseUsageConfig(),
+    )
+
+    use_schema_resolver: bool = Field(
+        default=True,
+        description="Read SchemaMetadata aspects from DataHub to aid in SQL parsing. Turn off only for testing.",
+        hidden_from_docs=True,
+    )
+
+    default_db: Optional[str] = Field(
+        default=None,
+        description="The default database to use for unqualified table names",
+    )
+
+    include_usage_statistics: bool = Field(
+        default=False,
+        description="Generate usage statistic.",
+    )
+
+
+@platform_name("Teradata")
+@config_class(TeradataConfig)
+@support_status(SupportStatus.TESTING)
+@capability(SourceCapability.DOMAINS, "Enabled by default")
+@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.DELETION_DETECTION, "Optionally enabled via configuration")
+@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
+@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
+@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration")
+@capability(SourceCapability.USAGE_STATS, "Optionally enabled via configuration")
+class TeradataSource(TwoTierSQLAlchemySource):
+    """
+    This plugin extracts the following:
+
+    - Metadata for databases, schemas, views, and tables
+    - Column types associated with each table
+    - Table, row, and column statistics via optional SQL profiling
+    """
+
+    config: TeradataConfig
+
+    LINEAGE_QUERY: str = """SELECT ProcID, UserName as "user", StartTime AT TIME ZONE 'GMT' as "timestamp", DefaultDatabase as default_database, QueryText as query
+     FROM "DBC".DBQLogTbl
+     where ErrorCode = 0
+     and QueryText like 'create table demo_user.test_lineage%'
+     and "timestamp" >= TIMESTAMP '{start_time}'
+     and "timestamp" < TIMESTAMP '{end_time}'
+     """
+    urns: Optional[Set[str]]
+
+    def __init__(self, config: TeradataConfig, ctx: PipelineContext):
+        super().__init__(config, ctx, "teradata")
+
+        self.report: TeradataReport = TeradataReport()
+        self.graph: Optional[DataHubGraph] = ctx.graph
+
+        if self.graph:
+            if self.config.use_schema_resolver:
+                self.schema_resolver = (
+                    self.graph.initialize_schema_resolver_from_datahub(
+                        platform=self.platform,
+                        platform_instance=self.config.platform_instance,
+                        env=self.config.env,
+                    )
+                )
+                self.urns = self.schema_resolver.get_urns()
+            else:
+                self.schema_resolver = self.graph._make_schema_resolver(
+                    platform=self.platform,
+                    platform_instance=self.config.platform_instance,
+                    env=self.config.env,
+                )
+                self.urns = None
+        else:
+            self.schema_resolver = SchemaResolver(
+                platform=self.platform,
+                platform_instance=self.config.platform_instance,
+                graph=None,
+                env=self.config.env,
+            )
+            self.urns = None
+
+        self.builder: SqlParsingBuilder = SqlParsingBuilder(
+            usage_config=self.config.usage
+            if self.config.include_usage_statistics
+            else None,
+            generate_lineage=self.config.include_table_lineage,
+            generate_usage_statistics=self.config.include_usage_statistics,
+            generate_operations=self.config.usage.include_operational_stats,
+        )
+
+    @classmethod
+    def create(cls, config_dict, ctx):
+        config = TeradataConfig.parse_obj(config_dict)
+        return cls(config, ctx)
+
+    def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]:
+        engine = self.get_metadata_engine()
+        for entry in engine.execute(
+            self.LINEAGE_QUERY.format(
+                start_time=self.config.start_time, end_time=self.config.end_time
+            )
+        ):
+            self.report.num_queries_parsed += 1
+            if self.report.num_queries_parsed % 1000 == 0:
+                logger.info(f"Parsed {self.report.num_queries_parsed} queries")
+
+            result = sqlglot_lineage(
+                sql=entry.query,
+                schema_resolver=self.schema_resolver,
+                default_db=None,
+                default_schema=entry.default_database
+                if entry.default_database
+                else self.config.default_db,
+            )
+            if result.debug_info.table_error:
+                logger.debug(
+                    f"Error parsing table lineage, {result.debug_info.table_error}"
+                )
+                self.report.num_table_parse_failures += 1
+                continue
+
+            yield from self.builder.process_sql_parsing_result(
+                result,
+                query=entry.query,
+                query_timestamp=entry.timestamp,
+                user=f"urn:li:corpuser:{entry.user}",
+                include_urns=self.urns,
+            )
+
+    def get_metadata_engine(self) -> Engine:
+        url = self.config.get_sql_alchemy_url()
+        logger.debug(f"sql_alchemy_url={url}")
+        return create_engine(url, **self.config.options)
+
+    def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
+        yield from super().get_workunits_internal()
+        if self.config.include_table_lineage or self.config.include_usage_statistics:
+            self.report.report_ingestion_stage_start("audit log extraction")
+            yield from self.get_audit_log_mcps()
+            yield from self.builder.gen_workunits()
diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
index 8516a7054a9cd..b3b1331db768b 100644
--- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
+++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
@@ -70,11 +70,14 @@ def assert_sql_result(
     sql: str,
     *,
     dialect: str,
+    platform_instance: Optional[str] = None,
     expected_file: pathlib.Path,
     schemas: Optional[Dict[str, SchemaInfo]] = None,
     **kwargs: Any,
 ) -> None:
-    schema_resolver = SchemaResolver(platform=dialect)
+    schema_resolver = SchemaResolver(
+        platform=dialect, platform_instance=platform_instance
+    )
     if schemas:
         for urn, schema in schemas.items():
             schema_resolver.add_raw_schema_info(urn, schema)
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 349eb40a5e865..c830ec8c02fd4 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -482,6 +482,11 @@ def _column_level_lineage(  # noqa: C901
         # Our snowflake source lowercases column identifiers, so we are forced
         # to do fuzzy (case-insensitive) resolution instead of exact resolution.
         "snowflake",
+        # Teradata column names are case-insensitive.
+        # A name, even when enclosed in double quotation marks, is not case sensitive. For example, CUSTOMER and Customer are the same.
+        # See more below:
+        # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/acreldb/n0ejgx4895bofnn14rlguktfx5r3.htm
+        "teradata",
     }
 
     sqlglot_db_schema = sqlglot.MappingSchema(
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json
new file mode 100644
index 0000000000000..b0351a7e07ad2
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json
@@ -0,0 +1,38 @@
+{
+    "query_type": "CREATE",
+    "in_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)",
+        "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)"
+    ],
+    "out_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)"
+    ],
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
+                "column": "PatientId",
+                "native_column_type": "INTEGER()"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)",
+                    "column": "PatientId"
+                }
+            ]
+        },
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
+                "column": "BMI",
+                "native_column_type": "FLOAT()"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)",
+                    "column": "BMI"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index bb6e5f1581754..059add8db67e4 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -630,3 +630,45 @@ def test_snowflake_column_cast():
 
 
 # TODO: Add a test for setting platform_instance or env
+
+
+def test_teradata_default_normalization():
+    assert_sql_result(
+        """
+create table demo_user.test_lineage2 as
+ (
+    select
+        ppd.PatientId,
+        ppf.bmi
+    from
+        demo_user.pima_patient_features ppf
+    join demo_user.pima_patient_diagnoses ppd on
+        ppd.PatientId = ppf.PatientId
+ ) with data;
+""",
+        dialect="teradata",
+        default_schema="dbc",
+        platform_instance="myteradata",
+        schemas={
+            "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)": {
+                "HasDiabetes": "INTEGER()",
+                "PatientId": "INTEGER()",
+            },
+            "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)": {
+                "Age": "INTEGER()",
+                "BMI": "FLOAT()",
+                "BloodP": "INTEGER()",
+                "DiPedFunc": "FLOAT()",
+                "NumTimesPrg": "INTEGER()",
+                "PatientId": "INTEGER()",
+                "PlGlcConc": "INTEGER()",
+                "SkinThick": "INTEGER()",
+                "TwoHourSerIns": "INTEGER()",
+            },
+            "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)": {
+                "BMI": "FLOAT()",
+                "PatientId": "INTEGER()",
+            },
+        },
+        expected_file=RESOURCE_DIR / "test_teradata_default_normalization.json",
+    )

From 71c9bd3a495c1f3663d2268088f04d56dd8c37c9 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Fri, 13 Oct 2023 11:48:22 +0530
Subject: [PATCH 44/98] ci(ingest): update base requirements (#8995)

---
 .../base-requirements.txt                     | 398 +++++++++---------
 1 file changed, 205 insertions(+), 193 deletions(-)

diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt
index 82d9a93a9a2c3..eb082d50b3020 100644
--- a/docker/datahub-ingestion-base/base-requirements.txt
+++ b/docker/datahub-ingestion-base/base-requirements.txt
@@ -2,62 +2,58 @@
 # pyspark==3.0.3
 # pydeequ==1.0.1
 
-acryl-datahub-classify==0.0.6
-acryl-iceberg-legacy==0.0.4
-acryl-PyHive==0.6.13
-aenum==3.1.12
-aiohttp==3.8.4
+acryl-datahub-classify==0.0.8
+acryl-PyHive==0.6.14
+acryl-sqlglot==18.5.2.dev45
+aenum==3.1.15
+aiohttp==3.8.6
 aiosignal==1.3.1
-alembic==1.11.1
+alembic==1.12.0
 altair==4.2.0
-anyio==3.7.0
-apache-airflow==2.6.1
-apache-airflow-providers-common-sql==1.5.1
-apache-airflow-providers-ftp==3.4.1
-apache-airflow-providers-http==4.4.1
-apache-airflow-providers-imap==3.2.1
-apache-airflow-providers-sqlite==3.4.1
-apispec==5.2.2
+anyio==3.7.1
+apache-airflow==2.7.2
+apache-airflow-providers-common-sql==1.7.2
+apache-airflow-providers-ftp==3.5.2
+apache-airflow-providers-http==4.5.2
+apache-airflow-providers-imap==3.3.2
+apache-airflow-providers-sqlite==3.4.3
+apispec==6.3.0
 appdirs==1.4.4
 appnope==0.1.3
-argcomplete==3.0.8
-argon2-cffi==21.3.0
+argcomplete==3.1.2
+argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 asgiref==3.7.2
 asn1crypto==1.5.1
-asttokens==2.2.1
-async-timeout==4.0.2
+asttokens==2.4.0
+async-timeout==4.0.3
 asynch==0.2.2
 attrs==23.1.0
 avro==1.10.2
-avro-gen3==0.7.10
-azure-core==1.26.4
-azure-identity==1.10.0
-azure-storage-blob==12.16.0
-azure-storage-file-datalake==12.11.0
-Babel==2.12.1
+avro-gen3==0.7.11
+Babel==2.13.0
 backcall==0.2.0
 backoff==2.2.1
 beautifulsoup4==4.12.2
-bleach==6.0.0
-blinker==1.6.2
-blis==0.7.9
-boto3==1.26.142
-botocore==1.29.142
+bleach==6.1.0
+blinker==1.6.3
+blis==0.7.11
+boto3==1.28.62
+botocore==1.31.62
 bowler==0.9.0
-bracex==2.3.post1
+bracex==2.4
 cached-property==1.5.2
 cachelib==0.9.0
 cachetools==5.3.1
-catalogue==2.0.8
-cattrs==22.2.0
-certifi==2023.5.7
-cffi==1.15.1
-chardet==5.1.0
-charset-normalizer==2.1.1
+catalogue==2.0.10
+cattrs==23.1.2
+certifi==2023.7.22
+cffi==1.16.0
+chardet==5.2.0
+charset-normalizer==3.3.0
 ciso8601==2.3.0
-click==8.1.3
-click-default-group==1.2.2
+click==8.1.7
+click-default-group==1.2.4
 click-spinner==0.1.10
 clickclick==20.10.2
 clickhouse-cityhash==1.0.2.4
@@ -66,205 +62,217 @@ clickhouse-sqlalchemy==0.2.4
 cloudpickle==2.2.1
 colorama==0.4.6
 colorlog==4.8.0
-confection==0.0.4
+comm==0.1.4
+confection==0.1.3
 ConfigUpdater==3.1.1
 confluent-kafka==1.8.2
 connexion==2.14.2
 cron-descriptor==1.4.0
-croniter==1.3.15
-cryptography==37.0.4
+croniter==2.0.1
+cryptography==41.0.4
 cx-Oracle==8.3.0
-cymem==2.0.7
-dask==2023.5.1
-databricks-cli==0.17.7
+cymem==2.0.8
+dask==2023.9.3
+databricks-cli==0.18.0
 databricks-dbapi==0.6.0
-databricks-sdk==0.1.8
-debugpy==1.6.7
+databricks-sdk==0.10.0
+debugpy==1.8.0
 decorator==5.1.1
 defusedxml==0.7.1
-deltalake==0.9.0
+deltalake==0.11.0
 Deprecated==1.2.14
-dill==0.3.6
-dnspython==2.3.0
-docker==6.1.2
+dill==0.3.7
+dnspython==2.4.2
+docker==6.1.3
 docutils==0.20.1
 ecdsa==0.18.0
 elasticsearch==7.13.4
 email-validator==1.3.1
 entrypoints==0.4
 et-xmlfile==1.1.0
-exceptiongroup==1.1.1
-executing==1.2.0
-expandvars==0.9.0
-fastapi==0.95.2
-fastavro==1.7.4
-fastjsonschema==2.17.1
-feast==0.29.0
-filelock==3.12.0
+exceptiongroup==1.1.3
+executing==2.0.0
+expandvars==0.11.0
+fastapi==0.103.2
+fastavro==1.8.4
+fastjsonschema==2.18.1
+feast==0.31.1
+filelock==3.12.4
 fissix==21.11.13
 Flask==2.2.5
 flatdict==4.0.1
-frozenlist==1.3.3
-fsspec==2023.5.0
+frozenlist==1.4.0
+fsspec==2023.9.2
 future==0.18.3
-GeoAlchemy2==0.13.3
+GeoAlchemy2==0.14.1
 gitdb==4.0.10
-GitPython==3.1.31
-google-api-core==2.11.0
-google-auth==2.19.0
-google-cloud-appengine-logging==1.3.0
+GitPython==3.1.37
+google-api-core==2.12.0
+google-auth==2.23.3
+google-cloud-appengine-logging==1.3.2
 google-cloud-audit-log==0.2.5
-google-cloud-bigquery==3.10.0
-google-cloud-bigquery-storage==2.19.1
-google-cloud-core==2.3.2
+google-cloud-bigquery==3.12.0
+google-cloud-core==2.3.3
 google-cloud-datacatalog-lineage==0.2.2
 google-cloud-logging==3.5.0
 google-crc32c==1.5.0
-google-resumable-media==2.5.0
-googleapis-common-protos==1.59.0
+google-re2==1.1
+google-resumable-media==2.6.0
+googleapis-common-protos==1.60.0
 gql==3.4.1
 graphql-core==3.2.3
 graphviz==0.20.1
 great-expectations==0.15.50
-greenlet==2.0.2
+greenlet==3.0.0
 grpc-google-iam-v1==0.12.6
-grpcio==1.54.2
-grpcio-reflection==1.54.2
-grpcio-status==1.54.2
-grpcio-tools==1.54.2
-gssapi==1.8.2
-gunicorn==20.1.0
+grpcio==1.59.0
+grpcio-reflection==1.59.0
+grpcio-status==1.59.0
+grpcio-tools==1.59.0
+gssapi==1.8.3
+gunicorn==21.2.0
 h11==0.14.0
-hmsclient==0.1.1
-httpcore==0.17.2
-httptools==0.5.0
-httpx==0.24.1
+httpcore==0.18.0
+httptools==0.6.0
+httpx==0.25.0
 humanfriendly==10.0
 idna==3.4
-ijson==3.2.0.post0
-importlib-metadata==6.6.0
-importlib-resources==5.12.0
+ijson==3.2.3
+importlib-metadata==6.8.0
+importlib-resources==6.1.0
 inflection==0.5.1
 ipaddress==1.0.23
 ipykernel==6.17.1
-ipython==8.13.2
+ipython==8.16.1
 ipython-genutils==0.2.0
-ipywidgets==8.0.6
+ipywidgets==8.1.1
 iso3166==2.1.1
 isodate==0.6.1
 itsdangerous==2.1.2
-jedi==0.18.2
+jedi==0.19.1
 Jinja2==3.1.2
 jmespath==1.0.1
 JPype1==1.4.1
-jsonlines==3.1.0
-jsonpatch==1.32
-jsonpointer==2.3
+jsonlines==4.0.0
+jsonpatch==1.33
+jsonpointer==2.4
 jsonref==1.1.0
-jsonschema==4.17.3
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
 jupyter-server==1.24.0
 jupyter_client==7.4.9
 jupyter_core==4.12.0
 jupyterlab-pygments==0.2.2
-jupyterlab-widgets==3.0.7
+jupyterlab-widgets==3.0.9
 langcodes==3.3.0
 lark==1.1.4
 lazy-object-proxy==1.9.0
 leb128==1.0.5
-limits==3.5.0
+limits==3.6.0
 linear-tsv==1.1.0
 linkify-it-py==2.0.2
 lkml==1.3.1
 locket==1.0.0
 lockfile==0.12.2
 looker-sdk==23.0.0
-lxml==4.9.2
+lxml==4.9.3
 lz4==4.3.2
 makefun==1.15.1
 Mako==1.2.4
-Markdown==3.4.3
-markdown-it-py==2.2.0
-MarkupSafe==2.1.2
-marshmallow==3.19.0
-marshmallow-enum==1.5.1
+Markdown==3.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+marshmallow==3.20.1
 marshmallow-oneofschema==3.0.1
 marshmallow-sqlalchemy==0.26.1
 matplotlib-inline==0.1.6
-mdit-py-plugins==0.3.5
+mdit-py-plugins==0.4.0
 mdurl==0.1.2
-mistune==2.0.5
+mistune==3.0.2
 mixpanel==4.10.0
-mmh3==4.0.0
-more-itertools==9.1.0
+mlflow-skinny==2.7.1
+mmh3==4.0.1
+mmhash3==3.0.1
+more-itertools==10.1.0
 moreorless==0.4.0
-moto==4.1.10
-msal==1.16.0
-msal-extensions==1.0.0
+moto==4.2.5
+msal==1.22.0
 multidict==6.0.4
-murmurhash==1.0.9
-mypy==1.3.0
+murmurhash==1.0.10
+mypy==1.6.0
 mypy-extensions==1.0.0
 nbclassic==1.0.0
 nbclient==0.6.3
-nbconvert==7.4.0
-nbformat==5.8.0
-nest-asyncio==1.5.6
+nbconvert==7.9.2
+nbformat==5.9.1
+nest-asyncio==1.5.8
 networkx==3.1
-notebook==6.5.4
+notebook==6.5.6
 notebook_shim==0.2.3
-numpy==1.24.3
+numpy==1.26.0
 oauthlib==3.2.2
 okta==1.7.0
+openlineage-airflow==1.2.0
+openlineage-integration-common==1.2.0
+openlineage-python==1.2.0
+openlineage_sql==1.2.0
 openpyxl==3.1.2
+opentelemetry-api==1.20.0
+opentelemetry-exporter-otlp==1.20.0
+opentelemetry-exporter-otlp-proto-common==1.20.0
+opentelemetry-exporter-otlp-proto-grpc==1.20.0
+opentelemetry-exporter-otlp-proto-http==1.20.0
+opentelemetry-proto==1.20.0
+opentelemetry-sdk==1.20.0
+opentelemetry-semantic-conventions==0.41b0
 ordered-set==4.1.0
 oscrypto==1.3.0
-packaging==23.1
+packaging==23.2
 pandas==1.5.3
 pandavro==1.5.2
 pandocfilters==1.5.0
-parse==1.19.0
+parse==1.19.1
 parso==0.8.3
-partd==1.4.0
-pathspec==0.9.0
-pathy==0.10.1
+partd==1.4.1
+pathspec==0.11.2
+pathy==0.10.2
 pendulum==2.1.2
 pexpect==4.8.0
 phonenumbers==8.13.0
 pickleshare==0.7.5
-platformdirs==3.5.1
-pluggy==1.0.0
-portalocker==2.7.0
-preshed==3.0.8
+platformdirs==3.11.0
+pluggy==1.3.0
+preshed==3.0.9
 prison==0.2.1
 progressbar2==4.2.0
-prometheus-client==0.17.0
-prompt-toolkit==3.0.38
-proto-plus==1.22.2
-protobuf==4.23.2
+prometheus-client==0.17.1
+prompt-toolkit==3.0.39
+proto-plus==1.22.3
+protobuf==4.24.4
 psutil==5.9.5
-psycopg2-binary==2.9.6
+psycopg2-binary==2.9.9
 ptyprocess==0.7.0
 pure-eval==0.2.2
 pure-sasl==0.6.2
-py-partiql-parser==0.3.0
-pyarrow==8.0.0
+py-partiql-parser==0.3.7
+pyarrow==11.0.0
 pyasn1==0.5.0
 pyasn1-modules==0.3.0
 pyathena==2.4.1
 pycountry==22.3.5
 pycparser==2.21
-pycryptodome==3.18.0
-pycryptodomex==3.18.0
-pydantic==1.10.8
-pydash==7.0.3
+pycryptodome==3.19.0
+pycryptodomex==3.19.0
+pydantic==1.10.13
+pydash==7.0.6
 pydruid==0.6.5
-Pygments==2.15.1
-pymongo==4.3.3
-PyMySQL==1.0.3
-pyOpenSSL==22.0.0
+Pygments==2.16.1
+pyiceberg==0.4.0
+pymongo==4.5.0
+PyMySQL==1.1.0
+pyOpenSSL==23.2.0
 pyparsing==3.0.9
-pyrsistent==0.19.3
-pyspnego==0.9.0
+pyspnego==0.10.2
 python-daemon==3.0.1
 python-dateutil==2.8.2
 python-dotenv==1.0.0
@@ -272,111 +280,115 @@ python-jose==3.3.0
 python-ldap==3.4.3
 python-nvd3==0.15.0
 python-slugify==8.0.1
-python-stdnum==1.18
-python-tds==1.12.0
-python-utils==3.6.0
+python-stdnum==1.19
+python-tds==1.13.0
+python-utils==3.8.1
 python3-openid==3.2.0
-pytz==2023.3
+pytz==2023.3.post1
 pytzdata==2020.1
-PyYAML==6.0
-pyzmq==25.1.0
+PyYAML==6.0.1
+pyzmq==24.0.1
 ratelimiter==1.2.0.post0
 redash-toolbelt==0.1.9
-redshift-connector==2.0.910
-regex==2023.5.5
-requests==2.28.2
+redshift-connector==2.0.914
+referencing==0.30.2
+regex==2023.10.3
+requests==2.31.0
 requests-file==1.5.1
 requests-gssapi==1.2.3
 requests-ntlm==1.2.0
 requests-toolbelt==0.10.1
-responses==0.23.1
-retrying==1.3.4
+responses==0.23.3
 rfc3339-validator==0.1.4
 rfc3986==2.0.0
-rich==13.3.5
-rich_argparse==1.1.0
+rich==13.6.0
+rich-argparse==1.3.0
+rpds-py==0.10.6
 rsa==4.9
 ruamel.yaml==0.17.17
-s3transfer==0.6.1
-sasl3==0.2.11
-schwifty==2023.3.0
-scipy==1.10.1
+ruamel.yaml.clib==0.2.8
+s3transfer==0.7.0
+schwifty==2023.9.0
+scipy==1.11.3
 scramp==1.4.4
 Send2Trash==1.8.2
-setproctitle==1.3.2
-simple-salesforce==1.12.4
+sentry-sdk==1.32.0
+setproctitle==1.3.3
+simple-salesforce==1.12.5
 six==1.16.0
-smart-open==6.3.0
-smmap==5.0.0
+smart-open==6.4.0
+smmap==5.0.1
 sniffio==1.3.0
-snowflake-connector-python==2.9.0
-snowflake-sqlalchemy==1.4.7
-soupsieve==2.4.1
+snowflake-connector-python==3.2.1
+snowflake-sqlalchemy==1.5.0
+sortedcontainers==2.4.0
+soupsieve==2.5
 spacy==3.4.3
 spacy-legacy==3.0.12
-spacy-loggers==1.0.4
+spacy-loggers==1.0.5
 sql-metadata==2.2.2
-SQLAlchemy==1.4.41
-sqlalchemy-bigquery==1.6.1
+SQLAlchemy==1.4.44
+sqlalchemy-bigquery==1.8.0
 SQLAlchemy-JSONField==1.0.1.post0
 sqlalchemy-pytds==0.3.5
 sqlalchemy-redshift==0.8.14
 SQLAlchemy-Utils==0.41.1
-sqlalchemy2-stubs==0.0.2a34
-sqllineage==1.3.6
-sqlparse==0.4.3
-srsly==2.4.6
-stack-data==0.6.2
+sqlalchemy2-stubs==0.0.2a35
+sqllineage==1.3.8
+sqlparse==0.4.4
+srsly==2.4.8
+stack-data==0.6.3
 starlette==0.27.0
+strictyaml==1.7.3
 tableauserverclient==0.25
 tableschema==1.20.2
 tabulate==0.9.0
 tabulator==1.53.5
-tenacity==8.2.2
+tenacity==8.2.3
 termcolor==2.3.0
 terminado==0.17.1
 text-unidecode==1.3
-thinc==8.1.10
-thrift==0.16.0
+thinc==8.1.12
+thrift==0.13.0
 thrift-sasl==0.4.3
 tinycss2==1.2.1
 toml==0.10.2
 tomli==2.0.1
+tomlkit==0.12.1
 toolz==0.12.0
-tornado==6.3.2
-tqdm==4.65.0
+tornado==6.3.3
+tqdm==4.66.1
 traitlets==5.2.1.post0
-trino==0.324.0
+trino==0.327.0
 typeguard==2.13.3
 typer==0.7.0
-types-PyYAML==6.0.12.10
+types-PyYAML==6.0.12.12
 typing-inspect==0.9.0
-typing_extensions==4.5.0
-tzlocal==5.0.1
+typing_extensions==4.8.0
+tzlocal==5.1
 uc-micro-py==1.0.2
-ujson==5.7.0
+ujson==5.8.0
 unicodecsv==0.14.1
-urllib3==1.26.16
-uvicorn==0.22.0
+urllib3==1.26.17
+uvicorn==0.23.2
 uvloop==0.17.0
-vertica-python==1.3.2
-vertica-sqlalchemy-dialect==0.0.1
+vertica-python==1.3.5
+vertica-sqlalchemy-dialect==0.0.8
 vininfo==1.7.0
 volatile==2.1.0
 wasabi==0.10.1
-watchfiles==0.19.0
-wcmatch==8.4.1
-wcwidth==0.2.6
+watchfiles==0.20.0
+wcmatch==8.5
+wcwidth==0.2.8
 webencodings==0.5.1
-websocket-client==1.5.2
+websocket-client==1.6.4
 websockets==11.0.3
 Werkzeug==2.2.3
-widgetsnbextension==4.0.7
+widgetsnbextension==4.0.9
 wrapt==1.15.0
-WTForms==3.0.1
+WTForms==3.1.0
 xlrd==2.0.1
 xmltodict==0.13.0
 yarl==1.9.2
 zeep==4.2.1
-zipp==3.15.0
-zstd==1.5.5.1
+zstd==1.5.5.1
\ No newline at end of file

From c02cbb31e2896f9b596bc329af2e86459057b37e Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Fri, 13 Oct 2023 17:52:53 +0530
Subject: [PATCH 45/98] docs(Acryl DataHub): release notes for 0.2.12 (#9006)

---
 docs-website/sidebars.js                      |  1 +
 .../managed-datahub/release-notes/v_0_2_11.md |  2 +-
 .../managed-datahub/release-notes/v_0_2_12.md | 30 +++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 docs/managed-datahub/release-notes/v_0_2_12.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 21b3a1d3fe4d3..4fa73c995157a 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -608,6 +608,7 @@ module.exports = {
         },
         {
           "Managed DataHub Release History": [
+            "docs/managed-datahub/release-notes/v_0_2_12",
             "docs/managed-datahub/release-notes/v_0_2_11",
             "docs/managed-datahub/release-notes/v_0_2_10",
             "docs/managed-datahub/release-notes/v_0_2_9",
diff --git a/docs/managed-datahub/release-notes/v_0_2_11.md b/docs/managed-datahub/release-notes/v_0_2_11.md
index 1f42090848712..c99d10201e097 100644
--- a/docs/managed-datahub/release-notes/v_0_2_11.md
+++ b/docs/managed-datahub/release-notes/v_0_2_11.md
@@ -7,7 +7,7 @@ Release Availability Date
 
 Recommended CLI/SDK
 ---
-- `v0.11.0` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.10.5.5
+- `v0.11.0` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.11.0
 - [Deprecation] In LDAP ingestor, the manager_pagination_enabled changed to general pagination_enabled
 
 If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better.
diff --git a/docs/managed-datahub/release-notes/v_0_2_12.md b/docs/managed-datahub/release-notes/v_0_2_12.md
new file mode 100644
index 0000000000000..b13f471d9bf63
--- /dev/null
+++ b/docs/managed-datahub/release-notes/v_0_2_12.md
@@ -0,0 +1,30 @@
+# v0.2.12
+---
+
+Release Availability Date
+---
+13-Oct-2023
+
+Recommended CLI/SDK
+---
+- `v0.11.0.4` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.11.0.4
+- [breaking] Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
+- [breaking] Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted by Looker and LookML source connectors.
+- [breaking] The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7.
+- [breaking] Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
+- [breaking] The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
+This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future.
+If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
+Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
+`datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
+
+
+If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better.
+
+
+## Release Changelog
+---
+- Since `v0.2.11` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/75252a3d9f6a576904be5a0790d644b9ae2df6ac...10a190470e8c932b6d34cba49de7dbcba687a088 have been pulled in.
+
+## Some notable features in this SaaS release
+- Nested Domains available in this release

From 6bc742535379f6cc4558daa67b6561d549d6e607 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Fri, 13 Oct 2023 12:36:18 -0400
Subject: [PATCH 46/98] feat(cli/datacontract): Add data quality assertion
 support (#8968)

Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
Co-authored-by: Aseem Bansal <asmbansal2@gmail.com>
---
 .../api/entities/datacontract/assertion.py    |   7 +
 .../datacontract/assertion_operator.py        | 162 ++++++++++++++++++
 .../datacontract/data_quality_assertion.py    |  60 ++++---
 .../api/entities/datacontract/datacontract.py |   2 +-
 .../datacontract/freshness_assertion.py       |  54 +++---
 .../entities/datacontract/schema_assertion.py |  17 +-
 .../api/entities/datacontract/__init__.py     |   0
 .../test_data_quality_assertion.py            |  55 ++++++
 8 files changed, 292 insertions(+), 65 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
 create mode 100644 metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py
 create mode 100644 metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py

diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
new file mode 100644
index 0000000000000..c45d4ddc92458
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
@@ -0,0 +1,7 @@
+from typing import Optional
+
+from datahub.configuration import ConfigModel
+
+
+class BaseAssertion(ConfigModel):
+    description: Optional[str] = None
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
new file mode 100644
index 0000000000000..a41b0f7aafd9f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
@@ -0,0 +1,162 @@
+from typing import Optional, Union
+
+from typing_extensions import Literal, Protocol
+
+from datahub.configuration import ConfigModel
+from datahub.metadata.schema_classes import (
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+)
+
+
+class Operator(Protocol):
+    """Specification for an assertion operator.
+
+    This class exists only for documentation (not used in typing checking).
+    """
+
+    operator: str
+
+    def id(self) -> str:
+        ...
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        ...
+
+
+def _generate_assertion_std_parameter(
+    value: Union[str, int, float]
+) -> AssertionStdParameterClass:
+    if isinstance(value, str):
+        return AssertionStdParameterClass(
+            value=value, type=AssertionStdParameterTypeClass.STRING
+        )
+    elif isinstance(value, (int, float)):
+        return AssertionStdParameterClass(
+            value=str(value), type=AssertionStdParameterTypeClass.NUMBER
+        )
+    else:
+        raise ValueError(
+            f"Unsupported assertion parameter {value} of type {type(value)}"
+        )
+
+
+Param = Union[str, int, float]
+
+
+def _generate_assertion_std_parameters(
+    value: Optional[Param] = None,
+    min_value: Optional[Param] = None,
+    max_value: Optional[Param] = None,
+) -> AssertionStdParametersClass:
+    return AssertionStdParametersClass(
+        value=_generate_assertion_std_parameter(value) if value else None,
+        minValue=_generate_assertion_std_parameter(min_value) if min_value else None,
+        maxValue=_generate_assertion_std_parameter(max_value) if max_value else None,
+    )
+
+
+class EqualToOperator(ConfigModel):
+    type: Literal["equal_to"]
+    value: Union[str, int, float]
+
+    operator: str = AssertionStdOperatorClass.EQUAL_TO
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class BetweenOperator(ConfigModel):
+    type: Literal["between"]
+    min: Union[int, float]
+    max: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.BETWEEN
+
+    def id(self) -> str:
+        return f"{self.type}-{self.min}-{self.max}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(
+            min_value=self.min, max_value=self.max
+        )
+
+
+class LessThanOperator(ConfigModel):
+    type: Literal["less_than"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.LESS_THAN
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class GreaterThanOperator(ConfigModel):
+    type: Literal["greater_than"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.GREATER_THAN
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class LessThanOrEqualToOperator(ConfigModel):
+    type: Literal["less_than_or_equal_to"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class GreaterThanOrEqualToOperator(ConfigModel):
+    type: Literal["greater_than_or_equal_to"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class NotNullOperator(ConfigModel):
+    type: Literal["not_null"]
+
+    operator: str = AssertionStdOperatorClass.NOT_NULL
+
+    def id(self) -> str:
+        return f"{self.type}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters()
+
+
+Operators = Union[
+    EqualToOperator,
+    BetweenOperator,
+    LessThanOperator,
+    LessThanOrEqualToOperator,
+    GreaterThanOperator,
+    GreaterThanOrEqualToOperator,
+    NotNullOperator,
+]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
index a665e95e93c43..6a3944ba36baf 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
@@ -4,6 +4,8 @@
 from typing_extensions import Literal
 
 import datahub.emitter.mce_builder as builder
+from datahub.api.entities.datacontract.assertion import BaseAssertion
+from datahub.api.entities.datacontract.assertion_operator import Operators
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
@@ -14,12 +16,15 @@
     AssertionStdParametersClass,
     AssertionStdParameterTypeClass,
     AssertionTypeClass,
+    AssertionValueChangeTypeClass,
     DatasetAssertionInfoClass,
     DatasetAssertionScopeClass,
+    SqlAssertionInfoClass,
+    SqlAssertionTypeClass,
 )
 
 
-class IdConfigMixin(ConfigModel):
+class IdConfigMixin(BaseAssertion):
     id_raw: Optional[str] = pydantic.Field(
         default=None,
         alias="id",
@@ -30,25 +35,32 @@ def generate_default_id(self) -> str:
         raise NotImplementedError
 
 
-class CustomSQLAssertion(IdConfigMixin, ConfigModel):
+class CustomSQLAssertion(IdConfigMixin, BaseAssertion):
     type: Literal["custom_sql"]
-
     sql: str
+    operator: Operators = pydantic.Field(discriminator="type")
 
-    def generate_dataset_assertion_info(
-        self, entity_urn: str
-    ) -> DatasetAssertionInfoClass:
-        return DatasetAssertionInfoClass(
-            dataset=entity_urn,
-            scope=DatasetAssertionScopeClass.UNKNOWN,
-            fields=[],
-            operator=AssertionStdOperatorClass._NATIVE_,
-            aggregation=AssertionStdAggregationClass._NATIVE_,
-            logic=self.sql,
+    def generate_default_id(self) -> str:
+        return f"{self.type}-{self.sql}-{self.operator.id()}"
+
+    def generate_assertion_info(self, entity_urn: str) -> AssertionInfoClass:
+        sql_assertion_info = SqlAssertionInfoClass(
+            entity=entity_urn,
+            statement=self.sql,
+            operator=self.operator.operator,
+            parameters=self.operator.generate_parameters(),
+            # TODO: Support other types of assertions
+            type=SqlAssertionTypeClass.METRIC,
+            changeType=AssertionValueChangeTypeClass.ABSOLUTE,
+        )
+        return AssertionInfoClass(
+            type=AssertionTypeClass.SQL,
+            sqlAssertion=sql_assertion_info,
+            description=self.description,
         )
 
 
-class ColumnUniqueAssertion(IdConfigMixin, ConfigModel):
+class ColumnUniqueAssertion(IdConfigMixin, BaseAssertion):
     type: Literal["unique"]
 
     # TODO: support multiple columns?
@@ -57,10 +69,8 @@ class ColumnUniqueAssertion(IdConfigMixin, ConfigModel):
     def generate_default_id(self) -> str:
         return f"{self.type}-{self.column}"
 
-    def generate_dataset_assertion_info(
-        self, entity_urn: str
-    ) -> DatasetAssertionInfoClass:
-        return DatasetAssertionInfoClass(
+    def generate_assertion_info(self, entity_urn: str) -> AssertionInfoClass:
+        dataset_assertion_info = DatasetAssertionInfoClass(
             dataset=entity_urn,
             scope=DatasetAssertionScopeClass.DATASET_COLUMN,
             fields=[builder.make_schema_field_urn(entity_urn, self.column)],
@@ -72,6 +82,11 @@ def generate_dataset_assertion_info(
                 )
             ),
         )
+        return AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            datasetAssertion=dataset_assertion_info,
+            description=self.description,
+        )
 
 
 class DataQualityAssertion(ConfigModel):
@@ -92,16 +107,9 @@ def id(self) -> str:
     def generate_mcp(
         self, assertion_urn: str, entity_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
-        dataset_assertion_info = self.__root__.generate_dataset_assertion_info(
-            entity_urn
-        )
-
         return [
             MetadataChangeProposalWrapper(
                 entityUrn=assertion_urn,
-                aspect=AssertionInfoClass(
-                    type=AssertionTypeClass.DATASET,
-                    datasetAssertion=dataset_assertion_info,
-                ),
+                aspect=self.__root__.generate_assertion_info(entity_urn),
             )
         ]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
index 2df446623a9d6..f3c6be55e5fea 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
@@ -54,7 +54,7 @@ class DataContract(ConfigModel):
     freshness: Optional[FreshnessAssertion] = pydantic.Field(default=None)
 
     # TODO: Add a validator to ensure that ids are unique
-    data_quality: Optional[List[DataQualityAssertion]] = None
+    data_quality: Optional[List[DataQualityAssertion]] = pydantic.Field(default=None)
 
     _original_yaml_dict: Optional[dict] = None
 
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
index ee8fa1181e614..71741d76b22fc 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
@@ -6,6 +6,7 @@
 import pydantic
 from typing_extensions import Literal
 
+from datahub.api.entities.datacontract.assertion import BaseAssertion
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
@@ -21,7 +22,7 @@
 )
 
 
-class CronFreshnessAssertion(ConfigModel):
+class CronFreshnessAssertion(BaseAssertion):
     type: Literal["cron"]
 
     cron: str = pydantic.Field(
@@ -32,12 +33,30 @@ class CronFreshnessAssertion(ConfigModel):
         description="The timezone to use for the cron schedule. Defaults to UTC.",
     )
 
+    def generate_freshness_assertion_schedule(self) -> FreshnessAssertionScheduleClass:
+        return FreshnessAssertionScheduleClass(
+            type=FreshnessAssertionScheduleTypeClass.CRON,
+            cron=FreshnessCronScheduleClass(
+                cron=self.cron,
+                timezone=self.timezone,
+            ),
+        )
+
 
-class FixedIntervalFreshnessAssertion(ConfigModel):
+class FixedIntervalFreshnessAssertion(BaseAssertion):
     type: Literal["interval"]
 
     interval: timedelta
 
+    def generate_freshness_assertion_schedule(self) -> FreshnessAssertionScheduleClass:
+        return FreshnessAssertionScheduleClass(
+            type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL,
+            fixedInterval=FixedIntervalScheduleClass(
+                unit=CalendarIntervalClass.SECOND,
+                multiple=int(self.interval.total_seconds()),
+            ),
+        )
+
 
 class FreshnessAssertion(ConfigModel):
     __root__: Union[
@@ -51,36 +70,13 @@ def id(self):
     def generate_mcp(
         self, assertion_urn: str, entity_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
-        freshness = self.__root__
-
-        if isinstance(freshness, CronFreshnessAssertion):
-            schedule = FreshnessAssertionScheduleClass(
-                type=FreshnessAssertionScheduleTypeClass.CRON,
-                cron=FreshnessCronScheduleClass(
-                    cron=freshness.cron,
-                    timezone=freshness.timezone,
-                ),
-            )
-        elif isinstance(freshness, FixedIntervalFreshnessAssertion):
-            schedule = FreshnessAssertionScheduleClass(
-                type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL,
-                fixedInterval=FixedIntervalScheduleClass(
-                    unit=CalendarIntervalClass.SECOND,
-                    multiple=int(freshness.interval.total_seconds()),
-                ),
-            )
-        else:
-            raise ValueError(f"Unknown freshness type {freshness}")
-
-        assertionInfo = AssertionInfoClass(
+        aspect = AssertionInfoClass(
             type=AssertionTypeClass.FRESHNESS,
             freshnessAssertion=FreshnessAssertionInfoClass(
                 entity=entity_urn,
                 type=FreshnessAssertionTypeClass.DATASET_CHANGE,
-                schedule=schedule,
+                schedule=self.__root__.generate_freshness_assertion_schedule(),
             ),
+            description=self.__root__.description,
         )
-
-        return [
-            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
-        ]
+        return [MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=aspect)]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
index b5b592e01f58f..b62f94e0592fc 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
@@ -6,6 +6,7 @@
 import pydantic
 from typing_extensions import Literal
 
+from datahub.api.entities.datacontract.assertion import BaseAssertion
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.extractor.json_schema_util import get_schema_metadata
@@ -19,7 +20,7 @@
 )
 
 
-class JsonSchemaContract(ConfigModel):
+class JsonSchemaContract(BaseAssertion):
     type: Literal["json-schema"]
 
     json_schema: dict = pydantic.Field(alias="json-schema")
@@ -36,7 +37,7 @@ def _init_private_attributes(self) -> None:
         )
 
 
-class FieldListSchemaContract(ConfigModel, arbitrary_types_allowed=True):
+class FieldListSchemaContract(BaseAssertion, arbitrary_types_allowed=True):
     type: Literal["field-list"]
 
     fields: List[SchemaFieldClass]
@@ -67,15 +68,13 @@ def id(self):
     def generate_mcp(
         self, assertion_urn: str, entity_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
-        schema_metadata = self.__root__._schema_metadata
-
-        assertionInfo = AssertionInfoClass(
+        aspect = AssertionInfoClass(
             type=AssertionTypeClass.DATA_SCHEMA,
             schemaAssertion=SchemaAssertionInfoClass(
-                entity=entity_urn, schema=schema_metadata
+                entity=entity_urn,
+                schema=self.__root__._schema_metadata,
             ),
+            description=self.__root__.description,
         )
 
-        return [
-            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
-        ]
+        return [MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=aspect)]
diff --git a/metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py b/metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py b/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py
new file mode 100644
index 0000000000000..7be8b667a500b
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py
@@ -0,0 +1,55 @@
+from datahub.api.entities.datacontract.data_quality_assertion import (
+    DataQualityAssertion,
+)
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+    AssertionTypeClass,
+    AssertionValueChangeTypeClass,
+    SqlAssertionInfoClass,
+    SqlAssertionTypeClass,
+)
+
+
+def test_parse_sql_assertion():
+    assertion_urn = "urn:li:assertion:a"
+    entity_urn = "urn:li:dataset:d"
+    statement = "SELECT COUNT(*) FROM my_table WHERE value IS NOT NULL"
+
+    d = {
+        "type": "custom_sql",
+        "sql": statement,
+        "operator": {"type": "between", "min": 5, "max": 10},
+    }
+
+    assert DataQualityAssertion.parse_obj(d).generate_mcp(
+        assertion_urn, entity_urn
+    ) == [
+        MetadataChangeProposalWrapper(
+            entityUrn=assertion_urn,
+            aspect=AssertionInfoClass(
+                type=AssertionTypeClass.SQL,
+                sqlAssertion=SqlAssertionInfoClass(
+                    type=SqlAssertionTypeClass.METRIC,
+                    changeType=AssertionValueChangeTypeClass.ABSOLUTE,
+                    entity=entity_urn,
+                    statement="SELECT COUNT(*) FROM my_table WHERE value IS NOT NULL",
+                    operator=AssertionStdOperatorClass.BETWEEN,
+                    parameters=AssertionStdParametersClass(
+                        minValue=AssertionStdParameterClass(
+                            value="5",
+                            type=AssertionStdParameterTypeClass.NUMBER,
+                        ),
+                        maxValue=AssertionStdParameterClass(
+                            value="10",
+                            type=AssertionStdParameterTypeClass.NUMBER,
+                        ),
+                    ),
+                ),
+            ),
+        )
+    ]

From 1007204cda802f02a5639e074d95b634b2be9ddf Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Fri, 13 Oct 2023 21:07:19 +0200
Subject: [PATCH 47/98] feat(ingest/teradata): view parsing (#9005)

---
 .../docs/sources/teradata/teradata_pre.md     |   2 +-
 .../docs/sources/teradata/teradata_recipe.yml |   3 +-
 .../datahub/ingestion/source/sql/teradata.py  | 156 ++++++++++++------
 3 files changed, 106 insertions(+), 55 deletions(-)

diff --git a/metadata-ingestion/docs/sources/teradata/teradata_pre.md b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
index eb59caa29eb52..7263a59f5ea3d 100644
--- a/metadata-ingestion/docs/sources/teradata/teradata_pre.md
+++ b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
@@ -18,7 +18,7 @@
    
     If you want to run profiling, you need to grant select permission on all the tables you want to profile.
 
-3. If linege or usage extraction is enabled, please, check if query logging is enabled and it is set to size which
+3. If lineage or usage extraction is enabled, please, check if query logging is enabled and it is set to size which
 will fit for your queries (the default query text size Teradata captures is max 200 chars)
    An example how you can set it for all users:
     ```sql
diff --git a/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
index 8cf07ba4c3a01..cc94de20110fe 100644
--- a/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
+++ b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
@@ -3,12 +3,11 @@ source:
   type: teradata
   config:
     host_port: "myteradatainstance.teradata.com:1025"
-    #platform_instance: "myteradatainstance"
     username: myuser
     password: mypassword
     #database_pattern:
     #  allow:
-    #    - "demo_user"
+    #    - "my_database"
     #  ignoreCase: true
     include_table_lineage: true
     include_usage_statistics: true
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index dd11cd840bed9..6080cf7b371e3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -1,5 +1,6 @@
 import logging
 from dataclasses import dataclass
+from datetime import datetime
 from typing import Iterable, Optional, Set, Union
 
 # This import verifies that the dependencies are available.
@@ -11,6 +12,7 @@
 
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
@@ -32,11 +34,18 @@
 from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
+from datahub.metadata._schema_classes import (
+    MetadataChangeEventClass,
+    SchemaMetadataClass,
+    ViewPropertiesClass,
+)
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     BytesTypeClass,
     TimeTypeClass,
 )
+from datahub.utilities.file_backed_collections import FileBackedDict
 from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage
+from datahub.utilities.urns.dataset_urn import DatasetUrn
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -64,6 +73,7 @@
 @dataclass
 class TeradataReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
     num_queries_parsed: int = 0
+    num_view_ddl_parsed: int = 0
     num_table_parse_failures: int = 0
 
 
@@ -82,17 +92,16 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
         "This requires to have the table lineage feature enabled.",
     )
 
+    include_view_lineage = Field(
+        default=True,
+        description="Whether to include view lineage in the ingestion. "
+        "This requires to have the view lineage feature enabled.",
+    )
     usage: BaseUsageConfig = Field(
         description="The usage config to use when generating usage statistics",
         default=BaseUsageConfig(),
     )
 
-    use_schema_resolver: bool = Field(
-        default=True,
-        description="Read SchemaMetadata aspects from DataHub to aid in SQL parsing. Turn off only for testing.",
-        hidden_from_docs=True,
-    )
-
     default_db: Optional[str] = Field(
         default=None,
         description="The default database to use for unqualified table names",
@@ -141,46 +150,47 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
         self.report: TeradataReport = TeradataReport()
         self.graph: Optional[DataHubGraph] = ctx.graph
 
-        if self.graph:
-            if self.config.use_schema_resolver:
-                self.schema_resolver = (
-                    self.graph.initialize_schema_resolver_from_datahub(
-                        platform=self.platform,
-                        platform_instance=self.config.platform_instance,
-                        env=self.config.env,
-                    )
-                )
-                self.urns = self.schema_resolver.get_urns()
-            else:
-                self.schema_resolver = self.graph._make_schema_resolver(
-                    platform=self.platform,
-                    platform_instance=self.config.platform_instance,
-                    env=self.config.env,
-                )
-                self.urns = None
-        else:
-            self.schema_resolver = SchemaResolver(
-                platform=self.platform,
-                platform_instance=self.config.platform_instance,
-                graph=None,
-                env=self.config.env,
-            )
-            self.urns = None
-
         self.builder: SqlParsingBuilder = SqlParsingBuilder(
             usage_config=self.config.usage
             if self.config.include_usage_statistics
             else None,
-            generate_lineage=self.config.include_table_lineage,
+            generate_lineage=True,
             generate_usage_statistics=self.config.include_usage_statistics,
             generate_operations=self.config.usage.include_operational_stats,
         )
 
+        self.schema_resolver = SchemaResolver(
+            platform=self.platform,
+            platform_instance=self.config.platform_instance,
+            graph=None,
+            env=self.config.env,
+        )
+
+        self._view_definition_cache: FileBackedDict[str] = FileBackedDict()
+
     @classmethod
     def create(cls, config_dict, ctx):
         config = TeradataConfig.parse_obj(config_dict)
         return cls(config, ctx)
 
+    def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
+        for key in self._view_definition_cache.keys():
+            view_definition = self._view_definition_cache[key]
+            dataset_urn = DatasetUrn.create_from_string(key)
+
+            db_name: Optional[str] = None
+            # We need to get the default db from the dataset urn otherwise the builder generates the wrong urns
+            if "." in dataset_urn.get_dataset_name():
+                db_name = dataset_urn.get_dataset_name().split(".", 1)[0]
+
+            self.report.num_view_ddl_parsed += 1
+            if self.report.num_view_ddl_parsed % 1000 == 0:
+                logger.info(f"Parsed {self.report.num_queries_parsed} view ddl")
+
+            yield from self.gen_lineage_from_query(
+                query=view_definition, default_database=db_name, is_view_ddl=True
+            )
+
     def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]:
         engine = self.get_metadata_engine()
         for entry in engine.execute(
@@ -192,27 +202,43 @@ def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]:
             if self.report.num_queries_parsed % 1000 == 0:
                 logger.info(f"Parsed {self.report.num_queries_parsed} queries")
 
-            result = sqlglot_lineage(
-                sql=entry.query,
-                schema_resolver=self.schema_resolver,
-                default_db=None,
-                default_schema=entry.default_database
-                if entry.default_database
-                else self.config.default_db,
+            yield from self.gen_lineage_from_query(
+                query=entry.query,
+                default_database=entry.default_database,
+                timestamp=entry.timestamp,
+                user=entry.user,
+                is_view_ddl=False,
             )
-            if result.debug_info.table_error:
-                logger.debug(
-                    f"Error parsing table lineage, {result.debug_info.table_error}"
-                )
-                self.report.num_table_parse_failures += 1
-                continue
 
+    def gen_lineage_from_query(
+        self,
+        query: str,
+        default_database: Optional[str] = None,
+        timestamp: Optional[datetime] = None,
+        user: Optional[str] = None,
+        is_view_ddl: bool = False,
+    ) -> Iterable[MetadataWorkUnit]:
+        result = sqlglot_lineage(
+            sql=query,
+            schema_resolver=self.schema_resolver,
+            default_db=None,
+            default_schema=default_database
+            if default_database
+            else self.config.default_db,
+        )
+        if result.debug_info.table_error:
+            logger.debug(
+                f"Error parsing table lineage, {result.debug_info.table_error}"
+            )
+            self.report.num_table_parse_failures += 1
+        else:
             yield from self.builder.process_sql_parsing_result(
                 result,
-                query=entry.query,
-                query_timestamp=entry.timestamp,
-                user=f"urn:li:corpuser:{entry.user}",
-                include_urns=self.urns,
+                query=query,
+                is_view_ddl=is_view_ddl,
+                query_timestamp=timestamp,
+                user=f"urn:li:corpuser:{user}",
+                include_urns=self.schema_resolver.get_urns(),
             )
 
     def get_metadata_engine(self) -> Engine:
@@ -221,8 +247,34 @@ def get_metadata_engine(self) -> Engine:
         return create_engine(url, **self.config.options)
 
     def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
-        yield from super().get_workunits_internal()
+        # Add all schemas to the schema resolver
+        for wu in super().get_workunits_internal():
+            if isinstance(wu.metadata, MetadataChangeEventClass):
+                if wu.metadata.proposedSnapshot:
+                    for aspect in wu.metadata.proposedSnapshot.aspects:
+                        if isinstance(aspect, SchemaMetadataClass):
+                            self.schema_resolver.add_schema_metadata(
+                                wu.metadata.proposedSnapshot.urn,
+                                aspect,
+                            )
+                            break
+            if isinstance(wu.metadata, MetadataChangeProposalWrapper):
+                if (
+                    wu.metadata.entityUrn
+                    and isinstance(wu.metadata.aspect, ViewPropertiesClass)
+                    and wu.metadata.aspect.viewLogic
+                ):
+                    self._view_definition_cache[
+                        wu.metadata.entityUrn
+                    ] = wu.metadata.aspect.viewLogic
+            yield wu
+
+        if self.config.include_view_lineage:
+            self.report.report_ingestion_stage_start("view lineage extraction")
+            yield from self.get_view_lineage()
+
         if self.config.include_table_lineage or self.config.include_usage_statistics:
             self.report.report_ingestion_stage_start("audit log extraction")
             yield from self.get_audit_log_mcps()
-            yield from self.builder.gen_workunits()
+
+        yield from self.builder.gen_workunits()

From c2e8041d771db1a20889255372312791fb6d911c Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Fri, 13 Oct 2023 22:59:18 +0200
Subject: [PATCH 48/98] Adding missing sqlparser libs to setup.py (#9015)

---
 metadata-ingestion/setup.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 3ea9a2ea61d74..545cafca9d4df 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -373,7 +373,10 @@
     # FIXME: I don't think tableau uses sqllineage anymore so we should be able
     # to remove that dependency.
     "tableau": {"tableauserverclient>=0.17.0"} | sqllineage_lib | sqlglot_lib,
-    "teradata": sql_common | {"teradatasqlalchemy>=17.20.0.0"},
+    "teradata": sql_common
+    | usage_common
+    | sqlglot_lib
+    | {"teradatasqlalchemy>=17.20.0.0"},
     "trino": sql_common | trino,
     "starburst-trino-usage": sql_common | usage_common | trino,
     "nifi": {"requests", "packaging", "requests-gssapi"},
@@ -432,9 +435,7 @@
 deepdiff_dep = "deepdiff"
 test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"}
 
-debug_requirements = {
-    "memray"
-}
+debug_requirements = {"memray"}
 
 base_dev_requirements = {
     *base_requirements,

From 78b342f441b340189e4eab60574daa60074457e0 Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Fri, 13 Oct 2023 19:04:44 -0300
Subject: [PATCH 49/98] feat(graphql): support filtering based on greater
 than/less than criteria (#9001)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../src/main/resources/search.graphql         | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql
index 4cabdb04afe77..e0cde5a2db9f9 100644
--- a/datahub-graphql-core/src/main/resources/search.graphql
+++ b/datahub-graphql-core/src/main/resources/search.graphql
@@ -458,6 +458,26 @@ enum FilterOperator {
   Represents the relation: The field exists. If the field is an array, the field is either not present or empty.
   """
   EXISTS
+
+  """
+  Represent the relation greater than, e.g. ownerCount > 5
+  """
+  GREATER_THAN
+
+  """
+   Represent the relation greater than or equal to, e.g. ownerCount >= 5
+  """
+  GREATER_THAN_OR_EQUAL_TO
+
+  """
+  Represent the relation less than, e.g. ownerCount < 3
+  """
+  LESS_THAN
+
+  """
+  Represent the relation less than or equal to, e.g. ownerCount <= 3
+  """
+  LESS_THAN_OR_EQUAL_TO
 }
 
 """

From c81a339bfc3a57161e433c64bd331ca6af4f6f2d Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Mon, 16 Oct 2023 21:57:57 +0530
Subject: [PATCH 50/98] build(ingest): remove ratelimiter dependency (#9008)

---
 metadata-ingestion/setup.py                   |  1 -
 .../bigquery_v2/bigquery_audit_log_api.py     |  2 +-
 .../src/datahub/utilities/ratelimiter.py      | 56 +++++++++++++++++++
 .../tests/unit/utilities/test_ratelimiter.py  | 20 +++++++
 4 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/utilities/ratelimiter.py
 create mode 100644 metadata-ingestion/tests/unit/utilities/test_ratelimiter.py

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 545cafca9d4df..1f4f0a0bad9b2 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -38,7 +38,6 @@
     "progressbar2",
     "termcolor>=1.0.0",
     "psutil>=5.8.0",
-    "ratelimiter",
     "Deprecated",
     "humanfriendly",
     "packaging",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
index 03b12c61ee5c6..db552c09cd0a7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
@@ -4,7 +4,6 @@
 
 from google.cloud import bigquery
 from google.cloud.logging_v2.client import Client as GCPLoggingClient
-from ratelimiter import RateLimiter
 
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
     AuditLogEntry,
@@ -17,6 +16,7 @@
     BQ_DATE_SHARD_FORMAT,
     BQ_DATETIME_FORMAT,
 )
+from datahub.utilities.ratelimiter import RateLimiter
 
 logger: logging.Logger = logging.getLogger(__name__)
 
diff --git a/metadata-ingestion/src/datahub/utilities/ratelimiter.py b/metadata-ingestion/src/datahub/utilities/ratelimiter.py
new file mode 100644
index 0000000000000..3d47d25e14c49
--- /dev/null
+++ b/metadata-ingestion/src/datahub/utilities/ratelimiter.py
@@ -0,0 +1,56 @@
+import collections
+import threading
+import time
+from contextlib import AbstractContextManager
+from typing import Any, Deque
+
+
+# Modified version of https://github.com/RazerM/ratelimiter/blob/master/ratelimiter/_sync.py
+class RateLimiter(AbstractContextManager):
+
+    """Provides rate limiting for an operation with a configurable number of
+    requests for a time period.
+    """
+
+    def __init__(self, max_calls: int, period: float = 1.0) -> None:
+        """Initialize a RateLimiter object which enforces as much as max_calls
+        operations on period (eventually floating) number of seconds.
+        """
+        if period <= 0:
+            raise ValueError("Rate limiting period should be > 0")
+        if max_calls <= 0:
+            raise ValueError("Rate limiting number of calls should be > 0")
+
+        # We're using a deque to store the last execution timestamps, not for
+        # its maxlen attribute, but to allow constant time front removal.
+        self.calls: Deque = collections.deque()
+
+        self.period = period
+        self.max_calls = max_calls
+        self._lock = threading.Lock()
+
+    def __enter__(self) -> "RateLimiter":
+        with self._lock:
+            # We want to ensure that no more than max_calls were run in the allowed
+            # period. For this, we store the last timestamps of each call and run
+            # the rate verification upon each __enter__ call.
+            if len(self.calls) >= self.max_calls:
+                until = time.time() + self.period - self._timespan
+                sleeptime = until - time.time()
+                if sleeptime > 0:
+                    time.sleep(sleeptime)
+            return self
+
+    def __exit__(self, exc_type: Any, exc: Any, traceback: Any) -> None:
+        with self._lock:
+            # Store the last operation timestamp.
+            self.calls.append(time.time())
+
+            # Pop the timestamp list front (ie: the older calls) until the sum goes
+            # back below the period. This is our 'sliding period' window.
+            while self._timespan >= self.period:
+                self.calls.popleft()
+
+    @property
+    def _timespan(self) -> float:
+        return self.calls[-1] - self.calls[0]
diff --git a/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py
new file mode 100644
index 0000000000000..0384e1f918881
--- /dev/null
+++ b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py
@@ -0,0 +1,20 @@
+from collections import defaultdict
+from datetime import datetime
+from typing import Dict
+
+from datahub.utilities.ratelimiter import RateLimiter
+
+
+def test_rate_is_limited():
+    MAX_CALLS_PER_SEC = 5
+    TOTAL_CALLS = 18
+    actual_calls: Dict[float, int] = defaultdict(lambda: 0)
+
+    ratelimiter = RateLimiter(max_calls=MAX_CALLS_PER_SEC, period=1)
+    for _ in range(TOTAL_CALLS):
+        with ratelimiter:
+            actual_calls[datetime.now().replace(microsecond=0).timestamp()] += 1
+
+    assert len(actual_calls) == round(TOTAL_CALLS / MAX_CALLS_PER_SEC)
+    assert all(calls <= MAX_CALLS_PER_SEC for calls in actual_calls.values())
+    assert sum(actual_calls.values()) == TOTAL_CALLS

From 9ccd1d4f5da8f3c93cb9aaacdb5de66600c99c99 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Mon, 16 Oct 2023 14:34:15 -0400
Subject: [PATCH 51/98] build(ingest/redshift): Add sqlglot dependency (#9021)

---
 metadata-ingestion/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 1f4f0a0bad9b2..7be565d51260d 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -353,7 +353,7 @@
     | {"psycopg2-binary", "pymysql>=1.0.2"},
     "pulsar": {"requests"},
     "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib,
-    "redshift": sql_common | redshift_common | usage_common | {"redshift-connector"},
+    "redshift": sql_common | redshift_common | usage_common | sqlglot_lib | {"redshift-connector"},
     "redshift-legacy": sql_common | redshift_common,
     "redshift-usage-legacy": sql_common | usage_common | redshift_common,
     "s3": {*s3_base, *data_lake_profiling},

From 6366b63e48d37de883af61fb801632e9a43d6e48 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Mon, 16 Oct 2023 19:13:23 -0400
Subject: [PATCH 52/98] feat(ingest/teradata): Add option to not use file
 backed dict for view definitions (#9024)

---
 .../datahub/ingestion/source/sql/teradata.py  | 47 ++++++++-----------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index 6080cf7b371e3..e628e4dbd3446 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -1,7 +1,7 @@
 import logging
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Iterable, Optional, Set, Union
+from typing import Iterable, MutableMapping, Optional, Union
 
 # This import verifies that the dependencies are available.
 import teradatasqlalchemy  # noqa: F401
@@ -12,7 +12,6 @@
 
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
@@ -34,11 +33,7 @@
 from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
-from datahub.metadata._schema_classes import (
-    MetadataChangeEventClass,
-    SchemaMetadataClass,
-    ViewPropertiesClass,
-)
+from datahub.metadata._schema_classes import SchemaMetadataClass, ViewPropertiesClass
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     BytesTypeClass,
     TimeTypeClass,
@@ -112,6 +107,11 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
         description="Generate usage statistic.",
     )
 
+    use_file_backed_cache: bool = Field(
+        default=True,
+        description="Whether to use a file backed cache for the view definitions.",
+    )
+
 
 @platform_name("Teradata")
 @config_class(TeradataConfig)
@@ -142,7 +142,8 @@ class TeradataSource(TwoTierSQLAlchemySource):
      and "timestamp" >= TIMESTAMP '{start_time}'
      and "timestamp" < TIMESTAMP '{end_time}'
      """
-    urns: Optional[Set[str]]
+
+    _view_definition_cache: MutableMapping[str, str]
 
     def __init__(self, config: TeradataConfig, ctx: PipelineContext):
         super().__init__(config, ctx, "teradata")
@@ -166,7 +167,10 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
             env=self.config.env,
         )
 
-        self._view_definition_cache: FileBackedDict[str] = FileBackedDict()
+        if self.config.use_file_backed_cache:
+            self._view_definition_cache = FileBackedDict[str]()
+        else:
+            self._view_definition_cache = {}
 
     @classmethod
     def create(cls, config_dict, ctx):
@@ -249,24 +253,13 @@ def get_metadata_engine(self) -> Engine:
     def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
         # Add all schemas to the schema resolver
         for wu in super().get_workunits_internal():
-            if isinstance(wu.metadata, MetadataChangeEventClass):
-                if wu.metadata.proposedSnapshot:
-                    for aspect in wu.metadata.proposedSnapshot.aspects:
-                        if isinstance(aspect, SchemaMetadataClass):
-                            self.schema_resolver.add_schema_metadata(
-                                wu.metadata.proposedSnapshot.urn,
-                                aspect,
-                            )
-                            break
-            if isinstance(wu.metadata, MetadataChangeProposalWrapper):
-                if (
-                    wu.metadata.entityUrn
-                    and isinstance(wu.metadata.aspect, ViewPropertiesClass)
-                    and wu.metadata.aspect.viewLogic
-                ):
-                    self._view_definition_cache[
-                        wu.metadata.entityUrn
-                    ] = wu.metadata.aspect.viewLogic
+            urn = wu.get_urn()
+            schema_metadata = wu.get_aspect_of_type(SchemaMetadataClass)
+            if schema_metadata:
+                self.schema_resolver.add_schema_metadata(urn, schema_metadata)
+            view_properties = wu.get_aspect_of_type(ViewPropertiesClass)
+            if view_properties and self.config.include_view_lineage:
+                self._view_definition_cache[urn] = view_properties.viewLogic
             yield wu
 
         if self.config.include_view_lineage:

From 9fec6024fb177a321860e49f3c9977b41bb9e65f Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 17 Oct 2023 09:58:38 -0400
Subject: [PATCH 53/98] feat(ingest/unity-catalog): Support external S3 lineage
 (#9025)

---
 .../datahub/ingestion/source/aws/s3_util.py   | 11 +++++--
 .../source/snowflake/snowflake_lineage_v2.py  |  6 ++--
 .../datahub/ingestion/source/unity/config.py  |  8 +++++
 .../datahub/ingestion/source/unity/proxy.py   |  8 +++++
 .../ingestion/source/unity/proxy_types.py     | 31 +++++++++++++++++++
 .../datahub/ingestion/source/unity/report.py  |  2 ++
 .../datahub/ingestion/source/unity/source.py  | 23 ++++++++++++++
 7 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py
index 501162455cc45..878b8dd1bb9a5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py
@@ -34,21 +34,26 @@ def get_bucket_relative_path(s3_uri: str) -> str:
     return "/".join(strip_s3_prefix(s3_uri).split("/")[1:])
 
 
-def make_s3_urn(s3_uri: str, env: str) -> str:
+def make_s3_urn(s3_uri: str, env: str, remove_extension: bool = True) -> str:
     s3_name = strip_s3_prefix(s3_uri)
 
     if s3_name.endswith("/"):
         s3_name = s3_name[:-1]
 
     name, extension = os.path.splitext(s3_name)
-
-    if extension != "":
+    if remove_extension and extension != "":
         extension = extension[1:]  # remove the dot
         return f"urn:li:dataset:(urn:li:dataPlatform:s3,{name}_{extension},{env})"
 
     return f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name},{env})"
 
 
+def make_s3_urn_for_lineage(s3_uri: str, env: str) -> str:
+    # Ideally this is the implementation for all S3 URNs
+    # Don't feel comfortable changing `make_s3_urn` for glue, sagemaker, and athena
+    return make_s3_urn(s3_uri, env, remove_extension=False)
+
+
 def get_bucket_name(s3_uri: str) -> str:
     if not is_s3_uri(s3_uri):
         raise ValueError(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
index 9a993f5774032..0a15c352fc842 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
@@ -21,7 +21,7 @@
 import datahub.emitter.mce_builder as builder
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.aws.s3_util import make_s3_urn
+from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
 from datahub.ingestion.source.snowflake.constants import (
     LINEAGE_PERMISSION_ERROR,
     SnowflakeEdition,
@@ -652,7 +652,9 @@ def get_external_upstreams(self, external_lineage: Set[str]) -> List[UpstreamCla
             # For now, populate only for S3
             if external_lineage_entry.startswith("s3://"):
                 external_upstream_table = UpstreamClass(
-                    dataset=make_s3_urn(external_lineage_entry, self.config.env),
+                    dataset=make_s3_urn_for_lineage(
+                        external_lineage_entry, self.config.env
+                    ),
                     type=DatasetLineageTypeClass.COPY,
                 )
                 external_upstreams.append(external_upstream_table)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index a57ee39848855..16820c37d546e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -166,6 +166,14 @@ class UnityCatalogSourceConfig(
         description="Option to enable/disable lineage generation.",
     )
 
+    include_external_lineage: bool = pydantic.Field(
+        default=True,
+        description=(
+            "Option to enable/disable lineage generation for external tables."
+            " Only external S3 tables are supported at the moment."
+        ),
+    )
+
     include_notebooks: bool = pydantic.Field(
         default=False,
         description="Ingest notebooks, represented as DataHub datasets.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 9bcdb200f180e..3fb77ce512ed2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -33,6 +33,7 @@
     ALLOWED_STATEMENT_TYPES,
     Catalog,
     Column,
+    ExternalTableReference,
     Metastore,
     Notebook,
     Query,
@@ -248,6 +249,13 @@ def table_lineage(self, table: Table, include_entity_lineage: bool) -> None:
                     )
                     if table_ref:
                         table.upstreams[table_ref] = {}
+                elif "fileInfo" in item:
+                    external_ref = ExternalTableReference.create_from_lineage(
+                        item["fileInfo"]
+                    )
+                    if external_ref:
+                        table.external_upstreams.add(external_ref)
+
                 for notebook in item.get("notebookInfos") or []:
                     table.upstream_notebooks.add(notebook["notebook_id"])
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index 18ac2475b51e0..315c1c0d20186 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -10,6 +10,7 @@
     CatalogType,
     ColumnTypeName,
     DataSourceFormat,
+    SecurableType,
     TableType,
 )
 from databricks.sdk.service.sql import QueryStatementType
@@ -176,6 +177,35 @@ def external_path(self) -> str:
         return f"{self.catalog}/{self.schema}/{self.table}"
 
 
+@dataclass(frozen=True, order=True)
+class ExternalTableReference:
+    path: str
+    has_permission: bool
+    name: Optional[str]
+    type: Optional[SecurableType]
+    storage_location: Optional[str]
+
+    @classmethod
+    def create_from_lineage(cls, d: dict) -> Optional["ExternalTableReference"]:
+        try:
+            securable_type: Optional[SecurableType]
+            try:
+                securable_type = SecurableType(d.get("securable_type", "").lower())
+            except ValueError:
+                securable_type = None
+
+            return cls(
+                path=d["path"],
+                has_permission=d.get("has_permission") or True,
+                name=d.get("securable_name"),
+                type=securable_type,
+                storage_location=d.get("storage_location"),
+            )
+        except Exception as e:
+            logger.warning(f"Failed to create ExternalTableReference from {d}: {e}")
+            return None
+
+
 @dataclass
 class Table(CommonProperty):
     schema: Schema
@@ -193,6 +223,7 @@ class Table(CommonProperty):
     view_definition: Optional[str]
     properties: Dict[str, str]
     upstreams: Dict[TableReference, Dict[str, List[str]]] = field(default_factory=dict)
+    external_upstreams: Set[ExternalTableReference] = field(default_factory=set)
     upstream_notebooks: Set[NotebookId] = field(default_factory=set)
     downstream_notebooks: Set[NotebookId] = field(default_factory=set)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
index fa61571fa92cb..4153d9dd88eb8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
@@ -19,6 +19,8 @@ class UnityCatalogReport(IngestionStageReport, StaleEntityRemovalSourceReport):
     notebooks: EntityFilterReport = EntityFilterReport.field(type="notebook")
 
     num_column_lineage_skipped_column_count: int = 0
+    num_external_upstreams_lacking_permissions: int = 0
+    num_external_upstreams_unsupported: int = 0
 
     num_queries: int = 0
     num_queries_dropped_parse_failure: int = 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 27c1f341aa84d..b63cf65d55dc8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -41,6 +41,7 @@
     TestConnectionReport,
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
 from datahub.ingestion.source.common.subtypes import (
     DatasetContainerSubTypes,
     DatasetSubTypes,
@@ -455,6 +456,28 @@ def _generate_lineage_aspect(
                 )
             )
 
+        if self.config.include_external_lineage:
+            for external_ref in table.external_upstreams:
+                if not external_ref.has_permission or not external_ref.path:
+                    self.report.num_external_upstreams_lacking_permissions += 1
+                    logger.warning(
+                        f"Lacking permissions for external file upstream on {table.ref}"
+                    )
+                elif external_ref.path.startswith("s3://"):
+                    upstreams.append(
+                        UpstreamClass(
+                            dataset=make_s3_urn_for_lineage(
+                                external_ref.path, self.config.env
+                            ),
+                            type=DatasetLineageTypeClass.COPY,
+                        )
+                    )
+                else:
+                    self.report.num_external_upstreams_unsupported += 1
+                    logger.warning(
+                        f"Unsupported external file upstream on {table.ref}: {external_ref.path}"
+                    )
+
         if upstreams:
             return UpstreamLineageClass(
                 upstreams=upstreams,

From 10eb205cb8d455639c6d09dcc0c8f3853264f96f Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Tue, 17 Oct 2023 16:16:25 +0200
Subject: [PATCH 54/98] fix(ingest) - Fix file backed collection temp directory
 removal (#9027)

---
 .../src/datahub/utilities/file_backed_collections.py   | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
index c04d2138bc116..18493edded4b7 100644
--- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
@@ -3,6 +3,7 @@
 import logging
 import pathlib
 import pickle
+import shutil
 import sqlite3
 import tempfile
 from dataclasses import dataclass, field
@@ -56,15 +57,15 @@ class ConnectionWrapper:
     conn: sqlite3.Connection
     filename: pathlib.Path
 
-    _temp_directory: Optional[tempfile.TemporaryDirectory]
+    _temp_directory: Optional[str]
 
     def __init__(self, filename: Optional[pathlib.Path] = None):
         self._temp_directory = None
 
         # Warning: If filename is provided, the file will not be automatically cleaned up.
         if not filename:
-            self._temp_directory = tempfile.TemporaryDirectory()
-            filename = pathlib.Path(self._temp_directory.name) / _DEFAULT_FILE_NAME
+            self._temp_directory = tempfile.mkdtemp()
+            filename = pathlib.Path(self._temp_directory) / _DEFAULT_FILE_NAME
 
         self.conn = sqlite3.connect(filename, isolation_level=None)
         self.conn.row_factory = sqlite3.Row
@@ -101,7 +102,8 @@ def executemany(
     def close(self) -> None:
         self.conn.close()
         if self._temp_directory:
-            self._temp_directory.cleanup()
+            shutil.rmtree(self._temp_directory)
+            self._temp_directory = None
 
     def __enter__(self) -> "ConnectionWrapper":
         return self

From e7c662a0aca0be97e34bec55161766ea84036ced Mon Sep 17 00:00:00 2001
From: ethan-cartwright <ethan.cartwright.m@gmail.com>
Date: Tue, 17 Oct 2023 10:54:07 -0400
Subject: [PATCH 55/98] add dependency level to scrollAcrossLineage search
 results (#9016)

---
 datahub-web-react/src/graphql/scroll.graphql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datahub-web-react/src/graphql/scroll.graphql b/datahub-web-react/src/graphql/scroll.graphql
index 18274c50c2166..1031fed7b9e13 100644
--- a/datahub-web-react/src/graphql/scroll.graphql
+++ b/datahub-web-react/src/graphql/scroll.graphql
@@ -408,6 +408,7 @@ fragment downloadScrollAcrossLineageResult on ScrollAcrossLineageResults {
     count
     total
     searchResults {
+        degree
         entity {
             ...downloadSearchResults
         }

From ae5fd90c73ff29e00f4b8e20735ce0b72e7b823b Mon Sep 17 00:00:00 2001
From: ethan-cartwright <ethan.cartwright.m@gmail.com>
Date: Tue, 17 Oct 2023 10:55:07 -0400
Subject: [PATCH 56/98] add create dataproduct example (#9009)

---
 .../examples/library/create_dataproduct.py    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 metadata-ingestion/examples/library/create_dataproduct.py

diff --git a/metadata-ingestion/examples/library/create_dataproduct.py b/metadata-ingestion/examples/library/create_dataproduct.py
new file mode 100644
index 0000000000000..245395b602480
--- /dev/null
+++ b/metadata-ingestion/examples/library/create_dataproduct.py
@@ -0,0 +1,25 @@
+from datahub.api.entities.dataproduct.dataproduct import DataProduct
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
+
+data_product = DataProduct(
+    id="pet_of_the_week",
+    display_name="Pet of the Week Campagin",
+    domain="urn:li:domain:ef39e99a-9d61-406d-b4a8-c70b16380206",
+    description="This campaign includes Pet of the Week data.",
+    assets=[
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.pet_details,PROD)",
+        "urn:li:dashboard:(looker,baz)",
+        "urn:li:dataFlow:(airflow,dag_abc,PROD)",
+    ],
+    owners=[{"id": "urn:li:corpuser:jdoe", "type": "BUSINESS_OWNER"}],
+    terms=["urn:li:glossaryTerm:ClientsAndAccounts.AccountBalance"],
+    tags=["urn:li:tag:adoption"],
+    properties={"lifecycle": "production", "sla": "7am every day"},
+    external_url="https://en.wikipedia.org/wiki/Sloth",
+)
+
+for mcp in data_product.generate_mcp(upsert=False):
+    graph.emit(mcp)

From 75108ceb2ff125af52fb1e37f7f6d371a77de3b7 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Tue, 17 Oct 2023 14:13:31 -0400
Subject: [PATCH 57/98] Download Lineage Results Cypress Test (#9017)

---
 .../styled/search/DownloadAsCsvModal.tsx      |  2 +
 .../styled/search/SearchExtendedMenu.tsx      |  4 +-
 .../e2e/lineage/download_lineage_results.js   | 80 +++++++++++++++++++
 3 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js

diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx
index 452658583cf61..92e859ee1b329 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx
@@ -130,6 +130,7 @@ export default function DownloadAsCsvModal({
                         Close
                     </Button>
                     <Button
+                        data-testid="csv-modal-download-button"
                         onClick={() => {
                             setShowDownloadAsCsvModal(false);
                             triggerCsvDownload(saveAsTitle);
@@ -142,6 +143,7 @@ export default function DownloadAsCsvModal({
             }
         >
             <Input
+                data-testid="download-as-csv-input"
                 placeholder="datahub.csv"
                 value={saveAsTitle}
                 onChange={(e) => {
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx
index d81ec673324bf..a26749d8a37a1 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx
@@ -45,7 +45,7 @@ export default function SearchExtendedMenu({
 
     const menu = (
         <Menu>
-            <MenuItem key="0">
+            <MenuItem key="0" data-testid="download-as-csv-menu-item">
                 <DownloadAsCsvButton
                     isDownloadingCsv={isDownloadingCsv}
                     setShowDownloadAsCsvModal={setShowDownloadAsCsvModal}
@@ -75,7 +75,7 @@ export default function SearchExtendedMenu({
                 totalResults={totalResults}
             />
             <Dropdown overlay={menu} trigger={['click']}>
-                <MenuIcon />
+                <MenuIcon data-testid="three-dot-menu" />
             </Dropdown>
         </>
     );
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
new file mode 100644
index 0000000000000..315aa7b22b9da
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
@@ -0,0 +1,80 @@
+const test_dataset = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)";
+const first_degree = [
+    "urn:li:chart:(looker,cypress_baz1)",
+    "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)",
+    "urn:li:mlFeature:(cypress-test-2,some-cypress-feature-1)"
+];
+const second_degree = [
+    "urn:li:chart:(looker,cypress_baz2)",
+    "urn:li:dashboard:(looker,cypress_baz)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)",
+    "urn:li:mlPrimaryKey:(cypress-test-2,some-cypress-feature-2)"
+];
+const third_degree_plus = [
+    "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)",
+    "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_456)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created_no_tag,PROD)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)"
+];
+const downloadCsvFile = (filename) => {
+    cy.get('[data-testid="three-dot-menu"]').click();
+    cy.get('[data-testid="download-as-csv-menu-item"]').click();
+    cy.get('[data-testid="download-as-csv-input"]').clear().type(filename);
+    cy.get('[data-testid="csv-modal-download-button"]').click().wait(5000);
+    cy.ensureTextNotPresent("Creating CSV to download");
+};
+
+describe("download lineage results to .csv file", () => {
+
+    it("download and verify lineage results for 1st, 2nd and 3+ degree of dependencies", () => {
+      cy.loginWithCredentials();
+      cy.goToDataset(test_dataset,"SampleCypressKafkaDataset");
+      cy.openEntityTab("Lineage");
+
+      // Verify 1st degree of dependencies
+      cy.contains(/1 - 3 of 3/);
+      downloadCsvFile("first_degree_results.csv");
+      let first_degree_csv = cy.readFile('cypress/downloads/first_degree_results.csv');
+      first_degree.forEach(function (urn) {
+        first_degree_csv.should('contain', urn) 
+      });
+      second_degree.forEach(function (urn) {
+        first_degree_csv.should('not.contain', urn)
+      });
+      third_degree_plus.forEach(function (urn) {
+        first_degree_csv.should('not.contain', urn);
+      });
+
+      // Verify 1st and 2nd degree of dependencies
+      cy.get('[data-testid="facet-degree-2"]').click().wait(5000);
+      cy.contains(/1 - 7 of 7/);
+      downloadCsvFile("second_degree_results.csv");
+      let second_degree_csv = cy.readFile('cypress/downloads/second_degree_results.csv');
+      first_degree.forEach(function (urn) {
+        second_degree_csv.should('contain', urn) 
+      });
+      second_degree.forEach(function (urn) {
+        second_degree_csv.should('contain', urn)
+      });
+      third_degree_plus.forEach(function (urn) {
+        second_degree_csv.should('not.contain', urn);
+      });
+
+      // Verify 1st 2nd and 3+ degree of dependencies(Verify multi page download)
+      cy.get('[data-testid="facet-degree-3+"]').click().wait(5000);
+      cy.contains(/1 - 10 of 13/);
+      downloadCsvFile("third_plus_degree_results.csv");
+      let third_degree_csv = cy.readFile('cypress/downloads/third_plus_degree_results.csv');
+      first_degree.forEach(function (urn) {
+        third_degree_csv.should('contain', urn) 
+      });
+      second_degree.forEach(function (urn) {
+        third_degree_csv.should('contain', urn)
+      });
+      third_degree_plus.forEach(function (urn) {
+        third_degree_csv.should('contain', urn);
+      });
+    });
+}); 
\ No newline at end of file

From da6cc54d63e8b3814d2bb8dc4e14b90d74d67a5d Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 17 Oct 2023 16:18:39 -0400
Subject: [PATCH 58/98] fix(ingest/bigquery): Remove table name restrictions
 (allow $ and @) (#9030)

---
 .../source/bigquery_v2/bigquery_audit.py        | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
index 88060a9cdc91d..55366d6c57cf8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
@@ -3,7 +3,7 @@
 import re
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, ClassVar, Dict, List, Optional, Pattern, Set, Tuple, Union
+from typing import Any, ClassVar, Dict, List, Optional, Pattern, Tuple, Union
 
 from dateutil import parser
 
@@ -35,8 +35,6 @@ class BigqueryTableIdentifier:
     dataset: str
     table: str
 
-    invalid_chars: ClassVar[Set[str]] = {"$", "@"}
-
     # Note: this regex may get overwritten by the sharded_table_pattern config.
     # The class-level constant, however, will not be overwritten.
     _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[
@@ -105,18 +103,7 @@ def get_table_display_name(self) -> str:
             )
 
         table_name, _ = self.get_table_and_shard(shortened_table_name)
-        if not table_name:
-            table_name = self.dataset
-
-        # Handle exceptions
-        invalid_chars_in_table_name: List[str] = [
-            c for c in self.invalid_chars if c in table_name
-        ]
-        if invalid_chars_in_table_name:
-            raise ValueError(
-                f"Cannot handle {self.raw_table_name()} - poorly formatted table name, contains {invalid_chars_in_table_name}"
-            )
-        return table_name
+        return table_name or self.dataset
 
     def get_table_name(self) -> str:
         """

From 6f466b0e5080dfb7d5fee504821006223aeeddc7 Mon Sep 17 00:00:00 2001
From: RyanHolstien <RyanHolstien@users.noreply.github.com>
Date: Tue, 17 Oct 2023 15:40:38 -0500
Subject: [PATCH 59/98] chore(docker): update base images to alpine 3.18
 (#8967)

---
 docker/build.gradle                      | 2 +-
 docker/datahub-gms/Dockerfile            | 2 +-
 docker/datahub-ingestion-base/Dockerfile | 2 +-
 docker/datahub-mae-consumer/Dockerfile   | 2 +-
 docker/datahub-mce-consumer/Dockerfile   | 2 +-
 docker/datahub-upgrade/Dockerfile        | 2 +-
 docker/elasticsearch-setup/Dockerfile    | 2 +-
 docker/mysql-setup/Dockerfile            | 2 +-
 docker/postgres-setup/Dockerfile         | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/docker/build.gradle b/docker/build.gradle
index 0faea626e982d..c8fdbc86b18b7 100644
--- a/docker/build.gradle
+++ b/docker/build.gradle
@@ -35,7 +35,7 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') {
 
     environment "DATAHUB_TELEMETRY_ENABLED", "false"
     environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
-    // environment "ACTIONS_VERSION", 'alpine3.17-slim'
+    // environment "ACTIONS_VERSION", 'alpine3.18-slim'
     // environment "DATAHUB_ACTIONS_IMAGE", 'nginx'
 
     // Elastic
diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile
index 2d74a288b8c99..f5428f7480403 100644
--- a/docker/datahub-gms/Dockerfile
+++ b/docker/datahub-gms/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile
index 564cc19cc9a5f..25afe9b8b3dce 100644
--- a/docker/datahub-ingestion-base/Dockerfile
+++ b/docker/datahub-ingestion-base/Dockerfile
@@ -1,7 +1,7 @@
 ARG APP_ENV=full
 ARG BASE_IMAGE=base
 
-FROM golang:1-alpine3.17 AS dockerize-binary
+FROM golang:1-alpine3.18 AS dockerize-binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile
index 734f8ba452f3e..4b321b1639c1b 100644
--- a/docker/datahub-mae-consumer/Dockerfile
+++ b/docker/datahub-mae-consumer/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile
index ee5d927fb1ddb..4d38ee6daa235 100644
--- a/docker/datahub-mce-consumer/Dockerfile
+++ b/docker/datahub-mce-consumer/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile
index 4e1521cc0561e..945be54678a24 100644
--- a/docker/datahub-upgrade/Dockerfile
+++ b/docker/datahub-upgrade/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/elasticsearch-setup/Dockerfile b/docker/elasticsearch-setup/Dockerfile
index af3c8c9df762a..c8fb2eba911b8 100644
--- a/docker/elasticsearch-setup/Dockerfile
+++ b/docker/elasticsearch-setup/Dockerfile
@@ -3,7 +3,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/mysql-setup/Dockerfile b/docker/mysql-setup/Dockerfile
index 732b860a58f07..56bab61180489 100644
--- a/docker/mysql-setup/Dockerfile
+++ b/docker/mysql-setup/Dockerfile
@@ -1,4 +1,4 @@
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile
index 313615ac3465b..7f4d53ae044d4 100644
--- a/docker/postgres-setup/Dockerfile
+++ b/docker/postgres-setup/Dockerfile
@@ -1,4 +1,4 @@
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder

From 60c1aab58765c2f643021efbba2719a845be2033 Mon Sep 17 00:00:00 2001
From: RyanHolstien <RyanHolstien@users.noreply.github.com>
Date: Tue, 17 Oct 2023 15:50:32 -0500
Subject: [PATCH 60/98] fix(frontend): update cookie module (#8862)

---
 datahub-frontend/app/auth/AuthModule.java     |  4 +-
 datahub-frontend/app/auth/AuthUtils.java      |  9 ++-
 .../app/auth/cookie/CustomCookiesModule.java  | 22 ++++++
 .../cookie/CustomSessionCookieBaker.scala     | 25 ++++++
 .../sso/oidc/OidcAuthorizationGenerator.java  | 37 ---------
 .../app/auth/sso/oidc/OidcCallbackLogic.java  | 19 ++++-
 .../controllers/AuthenticationController.java | 24 +++---
 datahub-frontend/conf/application.conf        | 15 +++-
 .../test/app/ApplicationTest.java             | 28 +++++--
 docs/authentication/README.md                 |  5 +-
 .../guides/sso/configure-oidc-react.md        |  3 +-
 docs/deploy/environment-vars.md               | 13 +--
 docs/how/updating-datahub.md                  |  2 +
 .../AuthenticationConfiguration.java          |  2 +
 .../TokenServiceConfiguration.java            | 15 ++++
 metadata-service/auth-filter/build.gradle     |  2 +
 .../authentication/AuthTestConfiguration.java | 79 +++++++++++++++++++
 .../AuthenticationFilterTest.java             | 53 +++++++++++++
 .../src/main/resources/application.yml        |  2 +
 .../auth/DataHubTokenServiceFactory.java      |  4 +-
 20 files changed, 288 insertions(+), 75 deletions(-)
 create mode 100644 datahub-frontend/app/auth/cookie/CustomCookiesModule.java
 create mode 100644 datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala
 create mode 100644 metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java
 create mode 100644 metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java
 create mode 100644 metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java

diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java
index 98f3b82285eda..fe04c3629fe58 100644
--- a/datahub-frontend/app/auth/AuthModule.java
+++ b/datahub-frontend/app/auth/AuthModule.java
@@ -56,7 +56,7 @@ public class AuthModule extends AbstractModule {
      * Pac4j Stores Session State in a browser-side cookie in encrypted fashion. This configuration
      * value provides a stable encryption base from which to derive the encryption key.
      *
-     * We hash this value (SHA1), then take the first 16 bytes as the AES key.
+     * We hash this value (SHA256), then take the first 16 bytes as the AES key.
      */
     private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key";
     private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider";
@@ -93,7 +93,7 @@ protected void configure() {
                 // it to hex and slice the first 16 bytes, because AES key length must strictly
                 // have a specific length.
                 final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF);
-                final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
+                final String aesKeyHash = DigestUtils.sha256Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
                 final String aesEncryptionKey = aesKeyHash.substring(0, 16);
                 playCacheCookieStore = new PlayCookieSessionStore(
                         new ShiroAesDataEncrypter(aesEncryptionKey.getBytes()));
diff --git a/datahub-frontend/app/auth/AuthUtils.java b/datahub-frontend/app/auth/AuthUtils.java
index 80bd631d0db70..386eee725c83d 100644
--- a/datahub-frontend/app/auth/AuthUtils.java
+++ b/datahub-frontend/app/auth/AuthUtils.java
@@ -41,6 +41,11 @@ public class AuthUtils {
      */
     public static final String SYSTEM_CLIENT_SECRET_CONFIG_PATH = "systemClientSecret";
 
+    /**
+     * Cookie name for redirect url that is manually separated from the session to reduce size
+     */
+    public static final String REDIRECT_URL_COOKIE_NAME = "REDIRECT_URL";
+
     public static final CorpuserUrn DEFAULT_ACTOR_URN = new CorpuserUrn("datahub");
 
     public static final String LOGIN_ROUTE = "/login";
@@ -77,7 +82,9 @@ public static boolean isEligibleForForwarding(Http.Request req) {
      * as well as their agreement to determine authentication status.
      */
     public static boolean hasValidSessionCookie(final Http.Request req) {
-        return req.session().data().containsKey(ACTOR)
+        Map<String, String> sessionCookie = req.session().data();
+        return sessionCookie.containsKey(ACCESS_TOKEN)
+                && sessionCookie.containsKey(ACTOR)
                 && req.getCookie(ACTOR).isPresent()
                 && req.session().data().get(ACTOR).equals(req.getCookie(ACTOR).get().value());
     }
diff --git a/datahub-frontend/app/auth/cookie/CustomCookiesModule.java b/datahub-frontend/app/auth/cookie/CustomCookiesModule.java
new file mode 100644
index 0000000000000..a6dbd69a93889
--- /dev/null
+++ b/datahub-frontend/app/auth/cookie/CustomCookiesModule.java
@@ -0,0 +1,22 @@
+package auth.cookie;
+
+import com.google.inject.AbstractModule;
+import play.api.libs.crypto.CookieSigner;
+import play.api.libs.crypto.CookieSignerProvider;
+import play.api.mvc.DefaultFlashCookieBaker;
+import play.api.mvc.FlashCookieBaker;
+import play.api.mvc.SessionCookieBaker;
+
+
+public class CustomCookiesModule extends AbstractModule {
+
+  @Override
+  public void configure() {
+    bind(CookieSigner.class).toProvider(CookieSignerProvider.class);
+    // We override the session cookie baker to not use a fallback, this prevents using an old URL Encoded cookie
+    bind(SessionCookieBaker.class).to(CustomSessionCookieBaker.class);
+    // We don't care about flash cookies, we don't use them
+    bind(FlashCookieBaker.class).to(DefaultFlashCookieBaker.class);
+  }
+
+}
diff --git a/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala b/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala
new file mode 100644
index 0000000000000..6f0a6604fa64b
--- /dev/null
+++ b/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala
@@ -0,0 +1,25 @@
+package auth.cookie
+
+import com.google.inject.Inject
+import play.api.http.{SecretConfiguration, SessionConfiguration}
+import play.api.libs.crypto.CookieSigner
+import play.api.mvc.DefaultSessionCookieBaker
+
+import scala.collection.immutable.Map
+
+/**
+ * Overrides default fallback to URL Encoding behavior, prevents usage of old URL encoded session cookies
+ * @param config
+ * @param secretConfiguration
+ * @param cookieSigner
+ */
+class CustomSessionCookieBaker @Inject() (
+  override val config: SessionConfiguration,
+  override val secretConfiguration: SecretConfiguration,
+  cookieSigner: CookieSigner
+) extends DefaultSessionCookieBaker(config, secretConfiguration, cookieSigner) {
+  // Has to be a Scala class because it extends a trait with concrete implementations, Scala does compilation tricks
+
+  // Forces use of jwt encoding and disallows fallback to legacy url encoding
+  override def decode(encodedData: String): Map[String, String] = jwtCodec.decode(encodedData)
+}
diff --git a/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java b/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java
index 3f864ed5abddf..baca144610ec4 100644
--- a/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java
+++ b/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java
@@ -1,19 +1,9 @@
 package auth.sso.oidc;
 
-import java.text.ParseException;
 import java.util.Map.Entry;
 import java.util.Optional;
 
-import com.nimbusds.jose.Algorithm;
-import com.nimbusds.jose.Header;
-import com.nimbusds.jose.JWEAlgorithm;
-import com.nimbusds.jose.JWSAlgorithm;
-import com.nimbusds.jose.util.Base64URL;
-import com.nimbusds.jose.util.JSONObjectUtils;
-import com.nimbusds.jwt.EncryptedJWT;
 import com.nimbusds.jwt.JWTParser;
-import com.nimbusds.jwt.SignedJWT;
-import net.minidev.json.JSONObject;
 import org.pac4j.core.authorization.generator.AuthorizationGenerator;
 import org.pac4j.core.context.WebContext;
 import org.pac4j.core.profile.AttributeLocation;
@@ -63,32 +53,5 @@ public Optional<UserProfile> generate(WebContext context, UserProfile profile) {
         
         return Optional.ofNullable(profile);
     }
-
-    private static JWT parse(final String s) throws ParseException {
-        final int firstDotPos = s.indexOf(".");
-
-        if (firstDotPos == -1) {
-            throw new ParseException("Invalid JWT serialization: Missing dot delimiter(s)", 0);
-        }
-
-        Base64URL header = new Base64URL(s.substring(0, firstDotPos));
-        JSONObject jsonObject;
-
-        try {
-            jsonObject = JSONObjectUtils.parse(header.decodeToString());
-        } catch (ParseException e) {
-            throw new ParseException("Invalid unsecured/JWS/JWE header: " + e.getMessage(), 0);
-        }
-
-        Algorithm alg = Header.parseAlgorithm(jsonObject);
-
-        if (alg instanceof JWSAlgorithm) {
-            return SignedJWT.parse(s);
-        } else if (alg instanceof JWEAlgorithm) {
-            return EncryptedJWT.parse(s);
-        } else {
-            throw new AssertionError("Unexpected algorithm type: " + alg);
-        }
-    }
     
 }
diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
index 4bde0872fc082..7164710f4e0de 100644
--- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
+++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
@@ -38,6 +38,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Base64;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -49,19 +50,21 @@
 import java.util.stream.Collectors;
 import lombok.extern.slf4j.Slf4j;
 import org.pac4j.core.config.Config;
+import org.pac4j.core.context.Cookie;
 import org.pac4j.core.engine.DefaultCallbackLogic;
 import org.pac4j.core.http.adapter.HttpActionAdapter;
 import org.pac4j.core.profile.CommonProfile;
 import org.pac4j.core.profile.ProfileManager;
 import org.pac4j.core.profile.UserProfile;
+import org.pac4j.core.util.Pac4jConstants;
 import org.pac4j.play.PlayWebContext;
 import play.mvc.Result;
 import auth.sso.SsoManager;
 
-import static auth.AuthUtils.createActorCookie;
-import static auth.AuthUtils.createSessionMap;
+import static auth.AuthUtils.*;
 import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME;
 import static com.linkedin.metadata.Constants.GROUP_MEMBERSHIP_ASPECT_NAME;
+import static org.pac4j.play.store.PlayCookieSessionStore.*;
 import static play.mvc.Results.internalServerError;
 
 
@@ -97,6 +100,9 @@ public OidcCallbackLogic(final SsoManager ssoManager, final Authentication syste
   public Result perform(PlayWebContext context, Config config,
       HttpActionAdapter<Result, PlayWebContext> httpActionAdapter, String defaultUrl, Boolean saveInSession,
       Boolean multiProfile, Boolean renewSession, String defaultClient) {
+
+    setContextRedirectUrl(context);
+
     final Result result =
         super.perform(context, config, httpActionAdapter, defaultUrl, saveInSession, multiProfile, renewSession,
             defaultClient);
@@ -111,6 +117,15 @@ public Result perform(PlayWebContext context, Config config,
     return handleOidcCallback(oidcConfigs, result, context, getProfileManager(context));
   }
 
+  @SuppressWarnings("unchecked")
+  private void setContextRedirectUrl(PlayWebContext context) {
+    Optional<Cookie> redirectUrl = context.getRequestCookies().stream()
+        .filter(cookie -> REDIRECT_URL_COOKIE_NAME.equals(cookie.getName())).findFirst();
+    redirectUrl.ifPresent(
+        cookie -> context.getSessionStore().set(context, Pac4jConstants.REQUESTED_URL,
+            JAVA_SER_HELPER.deserializeFromBytes(uncompressBytes(Base64.getDecoder().decode(cookie.getValue())))));
+  }
+
   private Result handleOidcCallback(final OidcConfigs oidcConfigs, final Result result, final PlayWebContext context,
       final ProfileManager<UserProfile> profileManager) {
 
diff --git a/datahub-frontend/app/controllers/AuthenticationController.java b/datahub-frontend/app/controllers/AuthenticationController.java
index e9ddfb2611ceb..4f89f4f67e149 100644
--- a/datahub-frontend/app/controllers/AuthenticationController.java
+++ b/datahub-frontend/app/controllers/AuthenticationController.java
@@ -13,14 +13,15 @@
 import com.typesafe.config.Config;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
+import java.util.Base64;
 import java.util.Optional;
 import javax.annotation.Nonnull;
 import javax.inject.Inject;
 import org.apache.commons.lang3.StringUtils;
 import org.pac4j.core.client.Client;
+import org.pac4j.core.context.Cookie;
 import org.pac4j.core.exception.http.FoundAction;
 import org.pac4j.core.exception.http.RedirectionAction;
-import org.pac4j.core.util.Pac4jConstants;
 import org.pac4j.play.PlayWebContext;
 import org.pac4j.play.http.PlayHttpActionAdapter;
 import org.pac4j.play.store.PlaySessionStore;
@@ -33,18 +34,9 @@
 import play.mvc.Results;
 import security.AuthenticationManager;
 
-import static auth.AuthUtils.DEFAULT_ACTOR_URN;
-import static auth.AuthUtils.EMAIL;
-import static auth.AuthUtils.FULL_NAME;
-import static auth.AuthUtils.INVITE_TOKEN;
-import static auth.AuthUtils.LOGIN_ROUTE;
-import static auth.AuthUtils.PASSWORD;
-import static auth.AuthUtils.RESET_TOKEN;
-import static auth.AuthUtils.TITLE;
-import static auth.AuthUtils.USER_NAME;
-import static auth.AuthUtils.createActorCookie;
-import static auth.AuthUtils.createSessionMap;
+import static auth.AuthUtils.*;
 import static org.pac4j.core.client.IndirectClient.ATTEMPTED_AUTHENTICATION_SUFFIX;
+import static org.pac4j.play.store.PlayCookieSessionStore.*;
 
 
 // TODO add logging.
@@ -297,8 +289,12 @@ private Optional<Result> redirectToIdentityProvider(Http.RequestHeader request,
     }
 
     private void configurePac4jSessionStore(PlayWebContext context, Client client, String redirectPath) {
-        // Set the originally requested path for post-auth redirection.
-        _playSessionStore.set(context, Pac4jConstants.REQUESTED_URL, new FoundAction(redirectPath));
+        // Set the originally requested path for post-auth redirection. We split off into a separate cookie from the session
+        // to reduce size of the session cookie
+        FoundAction foundAction = new FoundAction(redirectPath);
+        byte[] javaSerBytes = JAVA_SER_HELPER.serializeToBytes(foundAction);
+        String serialized = Base64.getEncoder().encodeToString(compressBytes(javaSerBytes));
+        context.addResponseCookie(new Cookie(REDIRECT_URL_COOKIE_NAME, serialized));
         // This is to prevent previous login attempts from being cached.
         // We replicate the logic here, which is buried in the Pac4j client.
         if (_playSessionStore.get(context, client.getName() + ATTEMPTED_AUTHENTICATION_SUFFIX) != null) {
diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf
index 18d901d5ee7dd..1a62c8547e721 100644
--- a/datahub-frontend/conf/application.conf
+++ b/datahub-frontend/conf/application.conf
@@ -22,11 +22,16 @@ play.application.loader = play.inject.guice.GuiceApplicationLoader
 play.http.parser.maxMemoryBuffer = 10MB
 play.http.parser.maxMemoryBuffer = ${?DATAHUB_PLAY_MEM_BUFFER_SIZE}
 
-# TODO: Disable legacy URL encoding eventually
+play.modules.disabled += "play.api.mvc.LegacyCookiesModule"
 play.modules.disabled += "play.api.mvc.CookiesModule"
-play.modules.enabled += "play.api.mvc.LegacyCookiesModule"
+play.modules.enabled += "auth.cookie.CustomCookiesModule"
 play.modules.enabled += "auth.AuthModule"
 
+jwt {
+  # 'alg' https://tools.ietf.org/html/rfc7515#section-4.1.1
+      signatureAlgorithm = "HS256"
+}
+
 # We override the Akka server provider to allow setting the max header count to a higher value
 # This is useful while using proxies like Envoy that result in the frontend server rejecting GMS
 # responses as there's more than the max of 64 allowed headers
@@ -199,10 +204,14 @@ auth.native.enabled = ${?AUTH_NATIVE_ENABLED}
 # auth.native.enabled = false
 # auth.oidc.enabled = false # (or simply omit oidc configurations)
 
-# Login session expiration time
+# Login session expiration time, controls when the actor cookie is expired on the browser side
 auth.session.ttlInHours = 24
 auth.session.ttlInHours = ${?AUTH_SESSION_TTL_HOURS}
 
+# Control the length of time a session token is valid
+play.http.session.maxAge = 24h
+play.http.session.maxAge = ${?MAX_SESSION_TOKEN_AGE}
+
 analytics.enabled = true
 analytics.enabled = ${?DATAHUB_ANALYTICS_ENABLED}
 
diff --git a/datahub-frontend/test/app/ApplicationTest.java b/datahub-frontend/test/app/ApplicationTest.java
index 417fd79e76bbd..f27fefdb79669 100644
--- a/datahub-frontend/test/app/ApplicationTest.java
+++ b/datahub-frontend/test/app/ApplicationTest.java
@@ -1,6 +1,11 @@
 package app;
 
+import com.nimbusds.jwt.JWT;
+import com.nimbusds.jwt.JWTClaimsSet;
+import com.nimbusds.jwt.JWTParser;
 import controllers.routes;
+import java.text.ParseException;
+import java.util.Date;
 import no.nav.security.mock.oauth2.MockOAuth2Server;
 import no.nav.security.mock.oauth2.token.DefaultOAuth2TokenCallback;
 import okhttp3.mockwebserver.MockResponse;
@@ -27,8 +32,6 @@
 
 import java.io.IOException;
 import java.net.InetAddress;
-import java.net.URLEncoder;
-import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.Map;
 
@@ -149,7 +152,7 @@ public void testOpenIdConfig() {
   }
 
   @Test
-  public void testHappyPathOidc() throws InterruptedException {
+  public void testHappyPathOidc() throws ParseException {
     browser.goTo("/authenticate");
     assertEquals("", browser.url());
 
@@ -157,8 +160,23 @@ public void testHappyPathOidc() throws InterruptedException {
     assertEquals(TEST_USER, actorCookie.getValue());
 
     Cookie sessionCookie = browser.getCookie("PLAY_SESSION");
-    assertTrue(sessionCookie.getValue().contains("token=" + TEST_TOKEN));
-    assertTrue(sessionCookie.getValue().contains("actor=" + URLEncoder.encode(TEST_USER, StandardCharsets.UTF_8)));
+    String jwtStr = sessionCookie.getValue();
+    JWT jwt = JWTParser.parse(jwtStr);
+    JWTClaimsSet claims = jwt.getJWTClaimsSet();
+    Map<String, String> data = (Map<String, String>) claims.getClaim("data");
+    assertEquals(TEST_TOKEN, data.get("token"));
+    assertEquals(TEST_USER, data.get("actor"));
+    // Default expiration is 24h, so should always be less than current time + 1 day since it stamps the time before this executes
+    assertTrue(claims.getExpirationTime().compareTo(new Date(System.currentTimeMillis() + (24 * 60 * 60 * 1000))) < 0);
+  }
+
+  @Test
+  public void testAPI() throws ParseException {
+    testHappyPathOidc();
+    int requestCount = _gmsServer.getRequestCount();
+
+    browser.goTo("/api/v2/graphql/");
+    assertEquals(++requestCount, _gmsServer.getRequestCount());
   }
 
   @Test
diff --git a/docs/authentication/README.md b/docs/authentication/README.md
index f6eda88784486..ff4a3d83cfde3 100644
--- a/docs/authentication/README.md
+++ b/docs/authentication/README.md
@@ -31,8 +31,9 @@ When a user makes a request for Data within DataHub, the request is authenticate
 and programmatic calls to DataHub APIs. There are two types of tokens that are important:
 
 1. **Session Tokens**: Generated for users of the DataHub web application. By default, having a duration of 24 hours. 
-These tokens are encoded and stored inside browser-side session cookies. The duration a session token is valid for is configurable via the `AUTH_SESSION_TTL_HOURS` environment variable
-on the datahub-frontend deployment.
+These tokens are encoded and stored inside browser-side session cookies. The duration a session token is valid for is configurable via the `MAX_SESSION_TOKEN_AGE` environment variable
+on the datahub-frontend deployment. Additionally, the `AUTH_SESSION_TTL_HOURS` configures the expiration time of the actor cookie on the user's browser which will also prompt a user login. The difference between these is that the actor cookie expiration only affects the browser session and can still be used programmatically,
+but when the session expires it can no longer be used programmatically either as it is created as a JWT with an expiration claim.
 2. **Personal Access Tokens**: These are tokens generated via the DataHub settings panel useful for interacting
 with DataHub APIs. They can be used to automate processes like enriching documentation, ownership, tags, and more on DataHub. Learn
 more about Personal Access Tokens [here](personal-access-tokens.md). 
diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md
index 512d6adbf916f..1671673c09318 100644
--- a/docs/authentication/guides/sso/configure-oidc-react.md
+++ b/docs/authentication/guides/sso/configure-oidc-react.md
@@ -72,7 +72,8 @@ AUTH_OIDC_BASE_URL=your-datahub-url
 - `AUTH_OIDC_CLIENT_SECRET`: Unique client secret received from identity provider
 - `AUTH_OIDC_DISCOVERY_URI`: Location of the identity provider OIDC discovery API. Suffixed with `.well-known/openid-configuration`
 - `AUTH_OIDC_BASE_URL`: The base URL of your DataHub deployment, e.g. https://yourorgdatahub.com (prod) or http://localhost:9002 (testing)
-- `AUTH_SESSION_TTL_HOURS`: The length of time in hours before a user will be prompted to login again. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed.
+- `AUTH_SESSION_TTL_HOURS`: The length of time in hours before a user will be prompted to login again. Controls the actor cookie expiration time in the browser. Numeric value converted to hours, default 24.
+- `MAX_SESSION_TOKEN_AGE`: Determines the expiration time of a session token. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed. Accepts a valid relative Java date style String, default 24h.
 
 Providing these configs will cause DataHub to delegate authentication to your identity
 provider, requesting the "oidc email profile" scopes and parsing the "preferred_username" claim from
diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md
index 0689db9b17331..779c3d3d7c432 100644
--- a/docs/deploy/environment-vars.md
+++ b/docs/deploy/environment-vars.md
@@ -79,9 +79,10 @@ Simply replace the dot, `.`, with an underscore, `_`, and convert to uppercase.
 
 ## Frontend
 
-| Variable                           | Default  | Unit/Type | Components   | Description                                                                                                                                                                                                                                               |
-|------------------------------------|----------|-----------|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `AUTH_VERBOSE_LOGGING`             | `false`  | boolean   | [`Frontend`] | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging.                                                                                                                       |
-| `AUTH_OIDC_GROUPS_CLAIM`           | `groups` | string    | [`Frontend`] | Claim to use as the user's group.                                                                                                                                                                                                                         |
-| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false`  | boolean   | [`Frontend`] | Auto-provision the group from the user's group claim.                                                                                                                                                                                                     |
-| `AUTH_SESSION_TTL_HOURS`           | `24`     | string    | [`Frontend`] | The number of hours a user session is valid. [User session tokens are stateless and will become invalid after this time](https://www.playframework.com/documentation/2.8.x/SettingsSession#Session-Timeout-/-Expiration) requiring a user to login again. |
\ No newline at end of file
+| Variable                           | Default  | Unit/Type | Components    | Description                                                                                                                                                                                                                                        |
+|------------------------------------|----------|-----------|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `AUTH_VERBOSE_LOGGING`             | `false`  | boolean   | [`Frontend`]  | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging.                                                                                                                |
+| `AUTH_OIDC_GROUPS_CLAIM`           | `groups` | string    | [`Frontend`]  | Claim to use as the user's group.                                                                                                                                                                                                                  |
+| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false`  | boolean   | [`Frontend`]  | Auto-provision the group from the user's group claim.                                                                                                                                                                                              |
+| `AUTH_SESSION_TTL_HOURS`           | `24`     | string    | [`Frontend`]  | The number of hours a user session is valid. After this many hours the actor cookie will be expired by the browser and the user will be prompted to login again.                                                                                   |
+| `MAX_SESSION_TOKEN_AGE`            | `24h`    | string    | [`Frontend`]  | The maximum age of the session token. [User session tokens are stateless and will become invalid after this time](https://www.playframework.com/documentation/2.8.x/SettingsSession#Session-Timeout-/-Expiration) requiring a user to login again. | 
\ No newline at end of file
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 9cd4ad5c6f02d..3af3b2bdda215 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -22,6 +22,8 @@ Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
 ### Deprecations
 
 ### Other Notable Changes
+- Session token configuration has changed, all previously created session tokens will be invalid and users will be prompted to log in. Expiration time has also been shortened which may result in more login prompts with the default settings.
+  There should be no other interruption due to this change.
 
 ## 0.11.0
 
diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java
index f9cf1b01e1762..d3c5ba822ac04 100644
--- a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java
+++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java
@@ -29,4 +29,6 @@ public class AuthenticationConfiguration {
    * The lifespan of a UI session token.
    */
   private long sessionTokenDurationMs;
+
+  private TokenServiceConfiguration tokenService;
 }
diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java
new file mode 100644
index 0000000000000..0a606f0f06d92
--- /dev/null
+++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java
@@ -0,0 +1,15 @@
+package com.datahub.authentication;
+
+import lombok.Data;
+
+
+@Data
+/**
+ * Configurations for DataHub token service
+ */
+public class TokenServiceConfiguration {
+  private String signingKey;
+  private String salt;
+  private String issuer;
+  private String signingAlgorithm;
+}
diff --git a/metadata-service/auth-filter/build.gradle b/metadata-service/auth-filter/build.gradle
index 2dd07ef10274c..61e9015adc942 100644
--- a/metadata-service/auth-filter/build.gradle
+++ b/metadata-service/auth-filter/build.gradle
@@ -14,4 +14,6 @@ dependencies {
 
   annotationProcessor externalDependency.lombok
   testImplementation externalDependency.mockito
+  testImplementation externalDependency.testng
+  testImplementation externalDependency.springBootTest
 }
\ No newline at end of file
diff --git a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java
new file mode 100644
index 0000000000000..05ca428283a6c
--- /dev/null
+++ b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java
@@ -0,0 +1,79 @@
+package com.datahub.auth.authentication;
+
+import com.datahub.auth.authentication.filter.AuthenticationFilter;
+import com.datahub.authentication.AuthenticationConfiguration;
+import com.datahub.authentication.AuthenticatorConfiguration;
+import com.datahub.authentication.TokenServiceConfiguration;
+import com.datahub.authentication.token.StatefulTokenService;
+import com.linkedin.gms.factory.config.ConfigurationProvider;
+import com.linkedin.metadata.config.AuthPluginConfiguration;
+import com.linkedin.metadata.config.DataHubConfiguration;
+import com.linkedin.metadata.config.PluginConfiguration;
+import com.linkedin.metadata.entity.EntityService;
+import java.util.List;
+import java.util.Map;
+import javax.servlet.ServletException;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.DependsOn;
+
+import static org.mockito.Mockito.*;
+
+@Configuration
+public class AuthTestConfiguration {
+
+
+  @Bean
+  public EntityService entityService() {
+    return mock(EntityService.class);
+  }
+
+  @Bean("dataHubTokenService")
+  public StatefulTokenService statefulTokenService(ConfigurationProvider configurationProvider, EntityService entityService) {
+    TokenServiceConfiguration tokenServiceConfiguration = configurationProvider.getAuthentication().getTokenService();
+    return new StatefulTokenService(
+        tokenServiceConfiguration.getSigningKey(),
+        tokenServiceConfiguration.getSigningAlgorithm(),
+        tokenServiceConfiguration.getIssuer(),
+        entityService,
+        tokenServiceConfiguration.getSalt()
+    );
+  }
+
+  @Bean
+  public ConfigurationProvider configurationProvider() {
+    ConfigurationProvider configurationProvider = new ConfigurationProvider();
+    AuthenticationConfiguration authenticationConfiguration = new AuthenticationConfiguration();
+    authenticationConfiguration.setEnabled(true);
+    configurationProvider.setAuthentication(authenticationConfiguration);
+    DataHubConfiguration dataHubConfiguration = new DataHubConfiguration();
+    PluginConfiguration pluginConfiguration = new PluginConfiguration();
+    AuthPluginConfiguration authPluginConfiguration = new AuthPluginConfiguration();
+    authenticationConfiguration.setSystemClientId("__datahub_system");
+    authenticationConfiguration.setSystemClientSecret("JohnSnowKnowsNothing");
+    TokenServiceConfiguration tokenServiceConfiguration = new TokenServiceConfiguration();
+    tokenServiceConfiguration.setIssuer("datahub-metadata-service");
+    tokenServiceConfiguration.setSigningKey("WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=");
+    tokenServiceConfiguration.setSalt("ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O=");
+    tokenServiceConfiguration.setSigningAlgorithm("HS256");
+    authenticationConfiguration.setTokenService(tokenServiceConfiguration);
+    AuthenticatorConfiguration authenticator = new AuthenticatorConfiguration();
+    authenticator.setType("com.datahub.authentication.authenticator.DataHubTokenAuthenticator");
+    authenticator.setConfigs(Map.of("signingKey", "WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=",
+        "salt", "ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O="));
+    List<AuthenticatorConfiguration> authenticators = List.of(authenticator);
+    authenticationConfiguration.setAuthenticators(authenticators);
+    authPluginConfiguration.setPath("");
+    pluginConfiguration.setAuth(authPluginConfiguration);
+    dataHubConfiguration.setPlugin(pluginConfiguration);
+    configurationProvider.setDatahub(dataHubConfiguration);
+    return configurationProvider;
+  }
+
+  @Bean
+  // TODO: Constructor injection
+  @DependsOn({"configurationProvider", "dataHubTokenService", "entityService"})
+  public AuthenticationFilter authenticationFilter() throws ServletException {
+    return new AuthenticationFilter();
+  }
+}
diff --git a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java
new file mode 100644
index 0000000000000..2ac65bf09c912
--- /dev/null
+++ b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java
@@ -0,0 +1,53 @@
+package com.datahub.auth.authentication;
+
+import com.datahub.auth.authentication.filter.AuthenticationFilter;
+import com.datahub.authentication.Actor;
+import com.datahub.authentication.ActorType;
+import com.datahub.authentication.token.StatefulTokenService;
+import com.datahub.authentication.token.TokenException;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import javax.servlet.FilterChain;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.testng.annotations.Test;
+
+import static com.datahub.authentication.AuthenticationConstants.*;
+import static org.mockito.Mockito.*;
+
+
+@ContextConfiguration(classes = { AuthTestConfiguration.class })
+public class AuthenticationFilterTest extends AbstractTestNGSpringContextTests {
+
+  @Autowired
+  AuthenticationFilter _authenticationFilter;
+
+  @Autowired
+  StatefulTokenService _statefulTokenService;
+
+  @Test
+  public void testExpiredToken() throws ServletException, IOException, TokenException {
+    _authenticationFilter.init(null);
+    HttpServletRequest servletRequest = mock(HttpServletRequest.class);
+    HttpServletResponse servletResponse = mock(HttpServletResponse.class);
+    FilterChain filterChain = mock(FilterChain.class);
+    Actor actor = new Actor(ActorType.USER, "datahub");
+//    String token = _statefulTokenService.generateAccessToken(TokenType.SESSION, actor, 0L, System.currentTimeMillis(), "token",
+//        "token", actor.toUrnStr());
+    // Token generated 9/11/23, invalid for all future dates
+    String token = "eyJhbGciOiJIUzI1NiJ9.eyJhY3RvclR5cGUiOiJVU0VSIZCI6ImRhdGFodWIiLCJ0eXBlIjoiU0VTU0lPTiIsInZlcnNpb24iOiIxIiwian"
+        + "RpIjoiMmI0MzZkZDAtYjEwOS00N2UwLWJmYTEtMzM2ZmU4MTU4MDE1Iiwic3ViIjoiZGF0YWh1YiIsImV4cCI6MTY5NDU0NzA2OCwiaXNzIjoiZGF"
+        + "0YWh1Yi1tZXRhZGF0YS1zZXJ2aWNlIn0.giqx7J5a9mxuubG6rXdAMoaGlcII-fqY-W82Wm7OlLI";
+    when(servletRequest.getHeaderNames()).thenReturn(Collections.enumeration(List.of(AUTHORIZATION_HEADER_NAME)));
+    when(servletRequest.getHeader(AUTHORIZATION_HEADER_NAME))
+        .thenReturn("Bearer " + token);
+
+    _authenticationFilter.doFilter(servletRequest, servletResponse, filterChain);
+    verify(servletResponse, times(1)).sendError(eq(HttpServletResponse.SC_UNAUTHORIZED), anyString());
+  }
+}
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index d22f92adca8f9..5d72e24748072 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -25,6 +25,8 @@ authentication:
     # Key used to sign new tokens.
     signingKey: ${DATAHUB_TOKEN_SERVICE_SIGNING_KEY:WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=}
     salt: ${DATAHUB_TOKEN_SERVICE_SALT:ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O=}
+    issuer: ${DATAHUB_TOKEN_SERVICE_ISSUER:datahub-metadata-service}
+    signingAlgorithm: ${DATAHUB_TOKEN_SERVICE_SIGNING_ALGORITHM:HS256}
 
   # The max duration of a UI session in milliseconds. Defaults to 1 day.
   sessionTokenDurationMs: ${SESSION_TOKEN_DURATION_MS:86400000}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
index 6b2a61882be90..d47e1a0a73401 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
@@ -23,10 +23,10 @@ public class DataHubTokenServiceFactory {
   @Value("${authentication.tokenService.salt:}")
   private String saltingKey;
 
-  @Value("${elasticsearch.tokenService.signingAlgorithm:HS256}")
+  @Value("${authentication.tokenService.signingAlgorithm:HS256}")
   private String signingAlgorithm;
 
-  @Value("${elasticsearch.tokenService.issuer:datahub-metadata-service}")
+  @Value("${authentication.tokenService.issuer:datahub-metadata-service}")
   private String issuer;
 
   /**

From 79f65563126c478fc4e254ea52d60fb645e26aad Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 17 Oct 2023 18:41:45 -0400
Subject: [PATCH 61/98] docs(datahub-lite): Fix recipe (#9023)

---
 docs/datahub_lite.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/datahub_lite.md b/docs/datahub_lite.md
index de0a20eed1d01..55491e3b998cf 100644
--- a/docs/datahub_lite.md
+++ b/docs/datahub_lite.md
@@ -85,9 +85,10 @@ source:
 
 sink:
   type: datahub-lite
-  forward_to:
-    type: datahub-rest
-    config:
+  config:
+    forward_to:
+      type: datahub-rest
+      config:
         server: "http://datahub-gms:8080"
 ```
 

From 9bb0bf25ae338c552d9d04620b4a7bdb607924c3 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Wed, 18 Oct 2023 14:09:27 +0530
Subject: [PATCH 62/98] fix(ingest): fix typo in parsing list of groups (#9037)

---
 metadata-ingestion/src/datahub/cli/specific/group_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/cli/specific/group_cli.py b/metadata-ingestion/src/datahub/cli/specific/group_cli.py
index 9baa8ee68d975..e313fce33d4d5 100644
--- a/metadata-ingestion/src/datahub/cli/specific/group_cli.py
+++ b/metadata-ingestion/src/datahub/cli/specific/group_cli.py
@@ -43,7 +43,7 @@ def upsert(file: Path, override_editable: bool) -> None:
     with get_default_graph() as emitter:
         for group_config in group_configs:
             try:
-                datahub_group = CorpGroup.parse_obj(config_dict)
+                datahub_group = CorpGroup.parse_obj(group_config)
                 for mcp in datahub_group.generate_mcp(
                     generation_config=CorpGroupGenerationConfig(
                         override_editable=override_editable, datahub_graph=emitter

From 5937937819c00afcef2f3702f51778b4452bb47d Mon Sep 17 00:00:00 2001
From: VISHAL KUMAR <110387730+vishalkSimplify@users.noreply.github.com>
Date: Wed, 18 Oct 2023 14:52:07 +0530
Subject: [PATCH 63/98] feat(ingestion/Vertica): Fixed vertica integration test
 Updated vertica dialect (#9011)

---
 datahub-web-react/src/images/verticalogo.png  |  Bin 11194 -> 25763 bytes
 metadata-ingestion/setup.py                   |    2 +-
 .../tests/integration/vertica/ddl.sql         |   19 +-
 .../integration/vertica/docker-compose.yml    |    2 +-
 .../tests/integration/vertica/test_vertica.py |   30 +-
 .../vertica/vertica_mces_with_db_golden.json  | 3840 ++++++++++++-----
 .../integration/vertica/vertica_to_file.yml   |    7 +
 7 files changed, 2699 insertions(+), 1201 deletions(-)

diff --git a/datahub-web-react/src/images/verticalogo.png b/datahub-web-react/src/images/verticalogo.png
index a81047fd43edbfc20d51263799454b96d3c565d8..5da38f4e67c7d4ea72c02983bc8cf01c9b10752f 100644
GIT binary patch
literal 25763
zcmeEug;$%+)^7+Y1Sdd?1$QV~io3fMDOR*tvEWcNxJz-jwou%qKylZW7KbKyacgn7
z>3i<|zH{!n|H1v%nyfsTJkQKz&+NT_+Y+s%p@@e~feiov@RXJ0UH|~-Pyhf83dTV7
zsAtyGq2AEkUnt4|swbhpP#@&14U}!v)dB3NYcK#U(jEZ%dkgACfqJ1HFGK^Nqu$Z}
z_7wvE&$sB%LiGQ0jfR>50Fbx6wgv#C0LpUGx;|(Jd5>D5KduHt!=+$zj3Doj$1J*q
zoF6z(hv-_Q*alXZd|;(Yu1+|dS|+lEt;A3wlNi6_hnwD&dhf&BtY@~9!lDf~fm3HI
zoqiiPU(RwC7K8<sN)zQka#H`@L=w9J{8!zR4$VeSf%LTh)(AtpAHz<WJid@kZpi-k
z1AiYbbO%>Pd<>kc67<$+`0)Zli1u#{J`K9=?P-`+hzzZvjPP0<`7^Y)F<5c%--9ER
z%>BPpg7aE#Jl#4z+6TQ)3=LeaqG7rZqB$xEl|a8EVaL)m4{B99T=BuPKa=&%Wz5H_
z?c)WCl-jm4ySLD<bAMF(Fc;J9LU>SXB)=@&lOOSM)$;2ym6nZgtQ-5clx{(v8?00_
z75_WHH88cr+W!ul!YCm!)KEq=`7WIpq5gG7Si3Uh*2!%&b>yCu-As2-D(Cwv!UF@~
z=%l>t_TU4tYyg(xf*JfYQ>S1zFo3LvMusU6j2n{0Qk3#OQ}l^_`}_ACBk1rZ^H3IF
z+&kPrn+w7QwZ?QQ$<`nU#d>aX;#%&5o$p(da;u#?sEuu?Aa+o=GYIbon*pHNEPVIu
z>ng5(H}7N$7YW#x8m3iGY|X|0R&|GITCj+Ye0$-Z;{GUv8uSWF1fsj9fl$$X&b$3Q
zw*mT23Z&3ejrHZ|JQlr4+GZUx&$4#4c5}N2^4o>=x92b9YC@Hl2t*>c6L)Cm*DGVx
zaoKe=HFjY>_tUR4*RuohyYn#v$c;9qaBcN#;D?|*O!hjKV+WE6(PoB&iBGS!fvUy5
zEz=#L2!?{dbS{d*(U%e3YEfZ(ngO{u^^k}X5vWCLy11F|!8$^N^!prmGp=@E^NF>U
z&fOb6-AHs6Z*#*Zf7-s^ys~>GiJ5yh4;;N?AbN(z734?##ywY9mh(i({MOl+0P)*D
z(W^nt^(T04Sb0|&p%qpYYb2hSSM1D*W_qI^mXE%fvD&P~rTLV>rRY>^C(k;rtQzb=
zhBhg1NDzQOn)AD=a7Jp6eV;!MYx5iT<a8zO=5Gbm-c$QHwA)^RQ}ZfqJv-0j&GpIF
zW+0Al10_Z)w>e-7(F*3gKGzt#>z88+LJ8SO@X+jq?-*qmXOXoFe(TkVLV&>1;o^I8
z{M_%<)?mk181YTTeHXurqr~yw>|^JVuwPjyb6h}3Ey{Xa6KyrDB0`zXLaBd!^~(R$
zryV?F{O+1ih@wcK;MwnJljInw=U6c1@4u$~3S%?CC+^>0VUFVM5kUyy`JnW^C5Ivc
z>A8sX14N(PW;nx@qV$lAcZ8c(ampr${|hVnq|6x_6%hgGonWGv@XurOxxs!YYd@am
zySIdwRK2%`;xc-v^jf08<vB0ORE9SJR3+Q1?136rnWoX;D5(MoNzAaN!Uy<$xh#A<
z*oy|83Vj7_kzmojH-61Qi2p79cj1Ae{POpNN?t-jD~+})ZCc8HJ;&!oabwkjz}xA8
z+`H2!t+>%r@!;dP<I-fSP&}`=9L=glz$g;~5oAXfcvg%Q^xT`)2v4#Tq`B<<dE|>W
zntX#*lt;U7?&{U27g4-6Se>SKqzD|r$8_fkb6ez%P(x$E9D*^@d;lM>@oTc-l%wR1
zxoWT`-o|qu^(E|Rx4HFzad=6XU*}_ZIwX@MpCDWUV{<>^y&&8$$%E)znziQn0oO9C
z#LE!{YWQ`BSz0P>(kv1&Og48j@QxFq<gj7+mFv)!LU*CX=lKtmY+{u*SNYxv{hU{V
z$CcPQUsqioEk%WO`$Sq2YlU+6^V7P71Fv{ri0;N}omJs}SyKtFT**9=m1F-smBFl1
z>73>sENSFGD{x^T2@A0?PnNLJ7u^l}Zdocz52Po|00=hcgEyn9VqrXyn&rt+Qli9}
zc=0!a=PIl&JWPK;J`V$ETVJQWav>t*2kf7<p?WXEImDl6OlTKBM$?H5O<Ul&{Whv{
zB~#HOSL(dOuF0Jner5i)0=8vs2$Jg#0Vq1Y65u)=(OWW}e+hw<aln5-E*YkOes4m1
z9GydOg3QlwFU)=Ago$iJo^@mr{xPZUJLvwVN?9)gT{h6QDN+9lXafazIw^aCq48R+
z7AB}=Pa6S^`|hOi=}aO;9^A@G+3B^cfgM+(M59^p_7zaO3Clg1jtkknFXMTVZQXdM
zU&1AqKS#Ti<XLi8^hskxO8<-pSxH}B=`fnPrD!5-S$iSCzlZKQct4V-*%fT9_#H_M
zW}=_Q2VoRKN($Sh+xE5AGP;*}ytzF%yKG8~)`_*Yu{V+_hgk?-w^vPUc?wBL?OidE
zsz&sEC8^6DNlXUVW+yz`Vn->TLOoDxX~_;BJVSdMq5tS<7d;}mo1*qNkB4ERfOWYb
zd?d+3IQ3@e^pMzZ&e>^{w)LV;d-;1zR&vqar)Uj@&lAxe%)63l7Jar)f>LBP<xZ8q
zPt0zWHP;&J{UAbvfR%#h_Rk6%NY8sP-NjzjCd9-_rGXt|&8veobUqX=iaj}`X~z)p
zj9(o2pnH1|pVXVR6)q!Ur?@0Ql~D8b_!YfCeh#z^uKbMxn9Htv)cJgWQxD__K91OX
z!8%vBgiQmeZ*a0mBZkL7gg_^`pr$t4PX;?p1lt*QJpt1qVrX&C%)4Txa_3fc7%uHO
zNSkR)oy9g1l?Mo&{~&F6lpVnP=q=A02b(%#g24tv2Kbv1#_3KZ>z=t?I<ye@XTnSX
z+%4@<R|aDh0}rXmPkgH7&xCH`k&CR(ZXNV|0?)14I|b2}@G%|SLT3d#)j`UJnEF@Y
z?9r;h3}mAHo@_2C^_&3mQtZ6KncZZqlLPHa1&>*Bg*t0Tnv2pZtkTrZ^r&y@kvNki
zuFz4S6)bu?`U;Tzf*nU%XtXr$m5|LL<G1^g91Vztu$g?5GopgQx&;6l!vK{Yo+-tv
z3!GVPS4fn4p(Jz_a2Mb>bef?YELkr^Bw}Xs=`w5I4hCRuwnO(tTay=o^83-AmjM5a
zk-WlA6J)oN^2C84;=VmRE-&gut+I-_78^KUD@9mPv~T!mw4w~$&)l8BZ!4f5b6{{^
z2_6r5!BB)_l$M~gBiEY6m{4JA+N=;mT|a}PdK6YnmGICjH;y-OOO>RbC)a7;<brMc
zPN;+tf(9w0fy)w}hCS-tsvK_l*TD4MNA&lLzqzf1aW^6fmXK;|99oAtYabn|iF_Yl
z^EMvxF;Ku<<ta{#`|IA~SRtD(xH-?wDCgwAyr+m?qcH!ovt`E!^4~0>@rF|qO-HId
z@TByUGyP?hJMghl2iiGsY}!n&t)kGV+7e4`pco>Ak-ytmk7Vn=Av@1<-Yh9C2;K5z
zZzT^@+0oTHj5erL-tKvj#|+hG`V1vsv2uwt0S1~JZS1g(^{w;l_<j|i8>UkvHhJx&
zyOwH;dj-<CMDu5eXuv=3A%AaMWRgQJI~hrwV6NN?L1Yw6Y;V3uHRd)U3-Ag#h(up>
zN&EE$CPtjP=WQE)YR@EFSVgjW35)LYPM7BSqwjJrhR!?gKwhKD%LUo2^v~ObfC^gT
z+HkC8Omz~2@5GCWtO<n3AdAY%H{6ByRV!~PO@Ff6b5~uzS26*8rtnbTDV{DLVFfk1
zJqi&a<lQ%e3y}e*9+^%_Y*0p<0lmASUCWkbv(OuHuZh`9i+ESuU+V!ojT4i<gH%!Y
z4{|zB;AFqICPD-7UXl@BB!6e0e(-a^$<5y70@G%I{8S@}i=S_=gEfb=gqY_kjB=F6
z2d~bRKZwqb<26sN+lQf`!k;o@jRT4+HH5N{|2!R!7Lup|B-(srp`%s?09CtfDpMXP
zFUu{tB>}T#%)+B1<nOl#)P^6t6Bb2;#~2Fn6{(BXu1v7MJ_;bGIBiI^xxv1yOeyO_
zZrgeGs4ZS0I5<5bL&u^AA34ivKk4|iMICM?b(K{e12bPYV72XxX&!lbwayM)-+jWz
zQ@627HL0NtBorbQP)7um{)E!rYY6&k^sf(nvg49Z){P7nfg%&+B{L=Pc5g&7t&dbM
zD`s$uMWBI`UDe&&hQs(n8&P?xZyP4}GJVZxp9DI$ahH$sJq}stYndzJz3RXU(vdRe
z?2-rYF1_j}efgjSBt%?qY8J`kD`eG+jW7m+pW;8u6=ruAi}}83q^sNnF6)Eb`#7SL
zLOD544cT<O_Kww0x1_1?C0X0eKaLBhycB#Lx&E%ZZo8=}c`5-z@z7UOhXqNyx;1S;
zm8i}Rh`iAllA#=`j;VR|81X%w4oY~^Es1Bwr8)aX^_YqBIni&c04*18@{00EIutd@
zs+B9cq=BW2R{pEI=Y~3w489**JCCsaox^uwj^sklOH)hgQC8G;dEpKCEK;(jw&i?o
zL4=F;evZa^36#U7VJc$8I|P2qx{vp|4-vxm8vHH#{p$k(5~^PG8x)O(R0%$q_KK}B
zuJId6oCR(=nA(wjQ`);BSxMXaQTRjjQEfjc1ATwiQ#a<l22;2be80HrP8ctK40uk)
z+j!kkXYJ?q$fwB~oKvjVj9KK^=8bgNZGKNys+}7WKyCC#Xnt0bMWbP30FK%!{Am?k
zVM<RYNkQvT*AfRRITYCxKjGMn=5qUb26ziyQuWjHjh#{QJpeDa?~}bqoZWt6@4C0$
zL_bFJ#9IUVnqD?eXi(5Z(VnOUvs{L16<~6VJ()97Sq2}Uyft(T*g6Rj%rVX^fBbhg
z2(4fn0&g;?NJ7%ez8T-))NnKC2lCk6<Fh&Qf!J}{R(oHaH^Qf3@s8ZzeM*)EY|D6A
zoBotqz*o<Z`dpO2sm{J;iq9lMfLeDgt9!I|`%KDuVQt1>>l7NqlDSUoxEg6w_dBa4
zh()T(mGTEND(JX5_V|jDyvtC7`Ueu$J6rLB^o74^OG>l9mk<WM{^~hPK)=tFVVeG3
zI)+J$Tr0(fksbM$SDeuur89v(qEC!IQdO1TMCPl16*U8K#rnvYAUSsM>)loQ*ZY3g
z-HApyQRS55E8dm}Kui+g5x1v)&jK5A#mf7TVO1|baJ>%`D?rUujSH=aMi-1|*&|}-
z=hSHhZkq43Xc5!sTnA~DVl$k_{UovaG2<(~WfjMik2Q|vx7<j!2O`@QzV{Lkl%ya%
zJIU4a@i!Hv(WG(GiR{B1SQEgPOPih>W{Zt{+!=vlt|adswc)domyw~G4mjkcBLX$<
z%Yxs2)uRDmX7FeozD{;O!d7j66L@RNL0PXK{vQ<-3lj-gQJEi7ev&y71t=Ub*Tnjm
zP=|0Qlq;31OHn1vLTT<XG<F;|UR2o|(beq2NP4ZGE{EqPZKovSV+M)RosXMRgr*=~
z{*(`%=G<HV(LAG_R{B)$gmhqLvZ<1vZ#$nN+7<{k^ux@4^wTzfc4(8P`v*NW0rgVa
zsqBkrY(lNb5>HH|b?YBrDz!zU7EkqtKb82A&&$U{UJa{L2NfIzcnJN$gIXE+3171P
zQt$pa(-Uc|i?lE7QiR$MH7D5X8RpO4#6_hN6>mS#S^ngHaqHm@ZVZwO@^pWW29#j7
zoX+T*D~r0{Pyy{KAy&Jk=PZ=<%t3r+5MT;jLLU8)8};o0^@8oIW!FM`s~tgdw4>;b
zi)W`+uU+-`TgejWuMF@5McxLB$wh_^9jdVwfFZIr@Dy=Nq4H0OoREu<WXo-82vw|7
z?dHc>kSPIF+~t&!Uh5Iim@dkm9>WdK-wE>7jOtb0%!#5b;)3=LDr-9aT0+8CODz)O
zA0M2Uup&iW-q-W{&249-fs9$hpqoB}By>}Z$FhT^o3!AD6Q$vYnF&Alef^}lG4io`
zc^-7#V=?ZBw))gvnC?iLaRD^{N$GF~ky6<VcN=-Z5iaRhYs`TT-tc=nFa9*?U^EBs
z2hzN+=4h6TkU2c1&q}B~i=aAZ6N_55)n$7cWfjPR7J~tj+;s9|YO>$boQ#1vMF_R#
zaMkp6+c#7@woC^eFMj8JH8|w78z#SuBnYr?I?kA2Ngx5WpqY@2kgnWfi7*y-jkaPh
zAk7&TWG9SRc8Ec9j9kbW^9iR3_+?A%d;~gTFZ5r#phkO6L@bey0`pajJdoU>Ma3S-
zm3mQ4Q+?0#^9k$e{r8<{){#y$b`{p@pyP@t>3ssT+#g7VE3)1j=gFt>q<w~I{$Yo(
zI0Y-s#P8<R9bSv(x=)t0c?ccC5cZdA!%-+tQFj91B0roY{xsq}$C3a@K&}Wb@JKur
zN&M+@$6MJI=u(U=O4Tm&lf!hy0k2wLc(!7|(JGPdhJ@8tSV)RmH9eGJ@?KetI1~Sm
z(CGvuitKaq_T`?P^MFz%<<PS&J2|REMLnx8{Kvk0jpcYXQYJw-;}To@A2}v((4S?l
zJg?3Ev4=hG8pe<)iazw?^1h8Ukl7<n`$xGvf-r1GsG8UGrQI6|%&McPod>7O?9U@X
zOAnN9BSHbf>O(24=}<Jq04w2&LYhE3%LKw@KoEstIU>3+iCvxc{8Tf+Nae3(u@$zy
z$sn}-&Z#<jj`;*!i{74z^GA4$NR=zNL^LP2F0_G@e^vm1Kj{4(<CKgFpL!~#(CSJP
zgba7S0SA`vhU^(+#F0P?>>%zGd@mX{6hI+F286S|dDv{+k^Eds^hvV8e(YA@j6N}<
zzGQz83!!_-rJ|ks;(f7V@}xU^x+9jNtflZCqom0$iB(LT#AWF7884#c!)CX;^@?eO
z+=p-QxTQ}cMlXJ3m$Z3@P1#klpr_H5G3i<F+!?gsJVbPCwLkszRsyEx$So+MiI(=g
zpxek!X4qZG-Y*jO^{p`4Bq7UQ*xM;4I8_~7Y!(+YkuwN>jek!pec8v3sc@|GoOHi#
z=vtg8fe@<o91x=jV%G?sTb?fIx~+eYqS0eeYbyIWF7?vDk!T=+@~*l$Q5ek`Tr#oG
zZ1$<bKHi2ZL;l2G0QjId5p{Is+NBm$<LFH0gw1djAD{FjD2#8aNA{Xk4h`#p>Us&`
z-$0+uE$+YR*W>Zms7`WyGJzaxniD@T`x*M<v+)Lr7QF<h94nZc8Bic5jJ^~(X>IJ~
zT9VRBZh5Bq2`>hs`|jgomQHgI3r~NkX9+8^@+Yr==k-AtGIR%$>R@h`xQeHAKqt9i
z247wqQ>zKamaj$HI%1GafY<Er*BoQGT}Q2vS}$CH?hO9;azjL3jE>cbP%_&g##xNh
zU&z?)#{OsU1@j{vRsuWRyYTJH3cG}ye&3V$o)n4unixV79SdMavBI#sjU6&-u^m`I
zP5REe@6X!vo-%Q0AinNP#Mv7;I8O#P6ldy$w1`-m={HL^i-|zBagkY@Zu!n?^E$)e
z(4=}L=6$!Gf3$dC&yz3M;(V4m!ec$!8D3ASgW{Tg?KodvD~k5%F3HgX;1;!QI)HZN
zlmc4h*Vo0Xu~Lk{3b|u<5`X{4J&7h^OtnwOANVR`sb>kg+XemJZ**DYwVcNKvi01Y
zq!iU1ZgzlPVKUr7HXNp>#TO$2B;HoOvZms%{5nZivy5Q0Cje$6quik{gL7P`$qF>@
zxintMJ%a`&=#q|yiM7Ox!%S#|o>V7f;<A4dMB{o0gixGzI5ppq`MYLy#=<mTG*nGL
zJbYyu7Wr;JfQ#97TwM23P71SUZUb^R;P#vsN(C7szcm4Mau-Xs5y3wb8xY{NLPn?F
z;9eQyvY@t#4b(awowt{Cs!6u}H2?i7=#Ts*I)eaQa_6F{qzgyuNMq<T*AEd(^fsJK
zK2IU1b6S26mpY4wW4<2!MlKqhc{{V6xhW*aRth6~Oh;AkH(bTmzLB2X(4r#AjEO&O
z3c|CY4E{+oxRC@g0oNWqYG=NPo1i6fd<p!ymZSHbdUv!bX-|kUDZg|DGbcKgUF`Fu
zrpcQJQ>AP1G9Tod3Yh;mQa9g>!mM3!&Md`jC!jxFpGbM%`4=v4`P@_32!|Kk5@Frl
zr}9i-+32BgpIy5-^G2KX5^|m!2CeA{tkAdaZt5q7PmdjDOX5JovKDlf>jaayk!**f
znfAXUFT9dk#fahqkBQx{w0CUk(7Vn0<LF$vl|}1fd(<REVgW%XnPa62GBt26+;~ok
z$Fb@r_t{vL^S8|14?&UY;@E<K`6%YOh?*bt*P?(gJ`ZcMe<G6w1IX%Yd;HG2q?X>G
zx9u@g;F!+Jw?4JQvoqVHIDF(Q3j>~0dlRRXwGkH2(_1o{xP0<#fWTX#E2Qm)fKK1i
zw`^clA~ZEZIH{7YZ9tF5c5x15%kQyk>~r-{ey$4eIL-XY1pHvk-vDTGNPb2-pxF0w
zggWOO;&_1ZyH0M5x$=Y7PU7@4N9>9?aX5o^qc6qNjR)_gNWF!3f^r+b!TwEenJeot
zi?`sfC{Dz%ud>n)BmxA=k!>jH7Km^35;1iGj^(*2Q5*glvStNiX^E1RFIPpt!k(NN
zb-th_DWPv(r@G7*&=M`qsTz&nu&@*E8BDT9yuKq#0A5`_->;?^_R2QA$1eL=H#5wt
z4l0)p7Ca<<$v`6iwFtB&-7d@Z-~;$0d8pNVFzuL6BdX{)EPp(ZAPru4qHx6!z#Eo_
z=k<M2ro8rj3~k$0*510tW)8R)lEYEBPJem&D`aTTt^mW`@m2J#G2bXG1z}2lHTnh*
z&6IG;Io`oHUM-j*VoCan9Rk;fWD3@<8>25}T-Aa!Lax48yt<td5&dX#O`_83k#?}a
zn;IInGI5eWRzCl$x=Hs#pLCS1%+&z-TPoH7ys$%1mGP7-M?W-_VA=lx+$w&l83E(G
zVf<%0ID0^~v23AxM}FeR)g|ExJyi}BT93%}{Fr8Ai%nt!OnLnY?yahRwA%9sL5E)v
zJE0ubHY!tpvPaXDpmAmxMteV)^$cGOl=Rs`k9vdXu@tv#qc+AOb3U1evG_hk+EmzD
z3a8MZyw<%Sp?58|NTYIE;5`;>L`~_dx3<z}q$5hB@gm1B>Tf|Wgl-6pC@6<zZ|vt?
z9BExTYBn9xM4%uas`G0l#x}GnjV{ifP1PBB%QMr7dAVMCL3%AVU>{^6CoM*)$AcZ7
zM1}86Ml<ut++o#uwFNh7KEeqR8#>3kAtThr5K-bx6S?NO7<J##UTzc_inp=Y?$Xi1
zzd}1V$eEh2_vw2ZA>Q2P&^vTwR38364oR$AN{a#i{@2ghSj)=^Gnvh4_Z|B`N8`@N
zB|MwZup5tGBklWnkjyr;kEm_vxf^OqWA_qL2(_u)#@7Ux@s{40Xr`8E)zvs`hWyT#
z^}R>Lp>R=QHmJ4ZwbhRI4_r-cPg?j0u-S0z6R;Q0E?YIna;3!|*Q_1GF?Qs9eHd|i
zW>8NKb@kQb{18KhAF^Tks5&{8uA&!<=DD$+Avyd`!V=crO<G#Zgz|3vVCd%7Hzlxq
zMN^?D1<^|eWUxlQo0B0=Uoi*G!mU@02i_rJByUSvKXG=L#WyvkUF<JQnf~zeCN4Re
zsh~JtisIV3k10Xc#AyCOo<z4fS~A|=QL_JaZ&9O7+BpIPKEEwuE5>K;P6N^v=nrc&
zzn1`AGkmvVh!~4sV^KKI@EGW!aefrVq}uy6y)O6aT;~Tq=8_K)|EvP9#G5~w9!9+6
z5k3)eF~5v2ktHGjteLTUNcXJ86@}NCfONKk^BSH)<sOY^R!D`Dg|k>+j-I=!s@3&A
zyp58MCf1+R7OZ?ufpu@v%G@H?aS9j3>8p{)&#mQxe5-|2x=V@r-?<}pk_{#|6jAsy
z8(=GN`C+uvga}ebfrc8_OU{UtASjV(t9z5_Sg_u6;s7aZGi3Y5N1{0Lf}-P-TV&`Z
zSL7;r^t#7!U%szi*wkEs@C%-TUd^;c>Q4)948Sy&?&Nm$!ctv{7mwyYfxaVmzBBx6
zUopx0*5d7!L^A1rK+#{H^L0o&{veSlr|0W86cvtjexs-`yvnC|X*ui#;}eP~fzsqA
zEHjnHP3`wt?Bf++SgWFr1wY)iw)V++@wfEXQlMT4IDn>S^UJ$k;!DY}%h)K3)04Xz
z#dF*BH$vD0i&7HQ{y*NJ`|jX`PoRx*oB^;+l3ot$Nce3jzOC0A#4Tuox)GbYJJul7
z$MH-jzWnY&xeZ0&ElIbkJ6$skY3#r>x?@*_lFU>g2)~PAd}x*Rp0O-nOS*npirRhF
z8tZbYJOuhewp=zc6n=z$_q6HJJ@H(`$m83brsiR`db~>24DR6%#swLI3^=Hxmk++h
z@p=+6cn{&wM2?}%DwI*#hwr1}exbPN$o8w~o#kN$>mtUaQX;$2Cplk<uW80Wc5csK
zunz2ZytdSrtVS9yn`A7nztG@qeB}7cdwW<gk4l_TYuzI$CVDMQ;4bVtk~KWmRU$K?
zP5bu6Wjm*ddxt^70$nx+WX=-Z_mv1#3_T%Y(5R+`(cej}G}&7G(wM7wD)YvQUO=^4
zuFwmlK))tmxc!66?a}ft(4}mhfB9?HukXsWUf<1&W7#mDTCvZKvar^vI)9oa^?}kQ
zsXo0zhu$%!KYJ<ZxeGg1R`F{S_+GSXQAX@Q^QdnRld30(e&i=&TQb>yB>o@zVkX^+
z?yyg1e2Zn(1F>r&)`KCFWYf)R84R^t<qsXX(K1a0a=#4Fr^11%Y+_J*myhowl_&@l
zG{g0;^S^xS`2c<FOdZ6^BrZoZ$8h+hx3-7bmJ);f)StzjpWSAA!uz(K1?b5F<EE$;
z@nGB0^K^+@JMsZSees#jBKV%Re*nW!!t}#al8Rf#n3t|7PJe7d-(JD?OzL{A_G%m8
z)tOak92uH86yK%~^S;bkpmb`WOF;DR17HQi2JS+mC|(E41q<3#NN2!JDTgD+W278`
zG$0QlLC3iRQzqJk(M$<Ia{nsp<ffnJJAFsmf-mL4yO(s-k1{h9fuq#7MG*fJHuSpL
zPharNA{FgYOi<4Jb7mg$cjxWE&NkK$tDt-WGKmaI0<fLg$5`S_0zI#%rW0+0AIc1W
z$FF@HbPA2DQP0YsDUIl45L-I8sZOB67YHQS$yjB5O-T{4KTsc=I=ZRwbwbsURo~Z)
zWLvl3uSk6ky6hLPZcB@jYA5tzhRHTovhyH9wlo#4qOoUPIVTN~nW`x!>-;|`<Kr?Z
zhuaZ}&vw#MusBGQUrBKn&PPv^vCxTHn}jrC^M@TPURZkG`m7Z^E{4>zp%5mQECr4I
zD@7#UWDW3<1jtJ;#FT9K%Ri)<!tiQk-2X{9A344RF(4QSw0Zem^;4+%=5y|JBiq+P
zCN%`{b#h&G@Z19OiI3<Z1X<V$tztXvN~0_zm+8;VdGv43`<m7z!bw9iPsTE+jyO39
z_)0&?@=?7ZS9~1n`fJ8`TFc)w>>#=*@99O!NzvuE!+k1X3kS~-<OwtbAKJ(Hgttt|
z2iU>oM##3Y-Mk<m7&DZujHT|<7qvVLTbjmWf74a{4C=@t_~~ShQxk<i+(`UkDWx^)
zs<(t+ZJt`}ds5P^)wdZq&{$DLk_Glxifw9|uSxB&ZK{7TB8$=ur0e$|ReeLNLV`vB
zl9_@%jU-!;e(060<U?u%Uzjv}e#vc;GN)vAVG^F_R}JIccjbXuPkE_t3DMpPvSMyA
zQG;vy`l(^=i9#C>5`xN3G`!BM!CZjfoJF=m_xqye3wzR1&!FP4^S6C_I4<vHUg@lc
zI!%7}Dz}U1n@T<k$<o?pJy;xv)iTl9g2;!oM3Nihx^IuirMr{1jT)Q+C7W`dhGEp9
z|5%<(dMMCg3SNK7Miyz{!_e6vD?Z1*zoNNr)b@$GfDtA&b2I&rkAb<Lsf<cR*~dD*
zuFnWK>1FbznlFWM>Jh@<5c!C}40tc_K+Y<h69sW)R-7(txMy5V?^CJH?OwY-xIQJv
zP#ha{OW!U%q{44Lx^|v$s7UlTqk7~e;O}ALxOh&7jn~3f#)p!Lo#120^2NEz?~HCH
zboAS-6efrrIlLX{XDV15kR?crP&g^bZs^>Gf?_>?vO0(GLCIY*`NT`z(S<V}X(AS?
zQ`)?CV7170nJ!m)tq!7$t8tx<#fLYwhqDiUr1u4($hEv<=M8DlVtV%>n(-__GXWNg
zspM&&z?XocYu?+Mz?-D2RS>i6i}6mITsF}!-egWGr@ThJezOLApajcRcZ#JSTvQ1x
z)4k8)s?Kw?y&5jQf>~x8H^?qyu5N9+-lM!f<<bcJFjwntErE~&dGy5$dq|WtLPH2i
z@sLz+ue4sDrxMJ;XQMuWl`?E-cIa+r+eQ}OE^dh~Y{f4t7xbxm@k3wF;EC*s_8X2n
zmis5nFI{<VbiN}?U{)i}Fp_H=T76@bvNlo<_7j$wC_WV#r$JARlFGyKk3byZEd#g?
zeo>;B>p>})AZGzN)Y9{CBP!-G%8=9|ZEPj3{f>ae{>ocT)KlnkhntHghDkoCHnGx>
zB|R>rBxul$S(1Kw3zRQ&Gl@MvsAq9Qigqy82=1pPd~5il(XQW!mx7+=C$SKDkX6<2
zCCQgXl*Ig{yEdAO5)s0ptsQKcWWNr+BWY11Q&6Hq{hwkSup=CvrN%cou8Cgmz9(>o
z_Z1e?Bh<`=-nfwnrb5yBmN!^XAyuN*=Q4;~r|;X0VmrT)z1B>T4zDqb)+(yg9E@Cq
z))Ly_xm3egMg!$5B2Y5`V+Le^$obDkM7dZN^pT}At~kv-^)Qag73vU$gO6kBHl8vG
zUbsp|q9Us*!I~a83v4VI@`bz^%rmy7SVx8l(erlBB$8akUFEIUWbl3{bW2J&!>vhf
zu?l4fWXQJpaz>2fviKPYtwoK;J=ydpK@J3((L}ud8yzP_s}9{fozUuqW8D|mqh?SF
z4h?)M5B4trONcByy6B%If^>Tw|2U)^P#5Bs9#qn?3HF#|oFMFWt{(B)ht+-+m2RtK
zji~h&@rx*d4_k@Vg$c^l{EPM$AR+nz94}Vn?+{IUl-2hNE0>1u_yf|a2^=W$bpUAR
zfr8Kv!Cg6`5D%Rk--*N~xWCG!Nco6&W6)AlU(co-sa&xT)-5z0=>R&tth^&T70HEC
zp*&$xQkcX-R8Bz><~PD`$4X<A<O&l&&T`W(^59#a%N@!d&C)iZM|k->FG||9{kN6A
zl<!AS4r@k{A()Sw{N;jvw!fLf$E0i#<Ucq1c0{)}r7-=0!X?`@W8h)g=do(5M=Xu`
zUl<|;%)Q?J&6KK!*SGKS&m>=@r%4sT{t}l_ZTI3bDMp8uo^m|=mL&f5W%{pg0k={e
zuxX))<070=nG$DOmu>tZhhTDt#VAbfwyacpUO{PbP*9zXCqjiDNTnV;1~|k~3f9?2
z2mWO(urlqeH{^oDjlB517@W;~E%nr<l2a`7-N+5|-l&s7(wL{!J@AVDc<3n0?%O04
zzRc^$P^v_-dSGtl%!VEM?L*4Pi=v14HWwxj<JV@WK)iWNu(TUFvtW|yUjk7pOqVc#
zB-s3ONqRI{S`jWX{rep!HW>{A?`W|4SEVKm(+mjscV|$tjf)$*?HoBYvl01l2@R02
zh`15Kf1qO<{Tb9dR>qpY@mK3g2N$$eP`Us++&*m;qwSBrN3j;n3zy}%4#Q(l>C57e
ziEO+1+Nk(;+%L`vJ2e{B^N<^n7v0)|NiBXTSALvMJAu52PbkvXk5y25zzF|@$$vpC
zn{<HjFQUZS$MQ32pqB9okl+imSRjFeL6c#L7xb4!u~tR0^jB{L1))d@amx}R!pitH
z8-yn99>~4<0_se=jh%=1hDDWdB$YfCZ;2)7cgoC|4imY$`j=f$h=~>`oV&y@aFnBG
zaY!1dKAH`dqdUgx=>P1xHX70;%aZu5{*P;~pVXwBI!&Fj3H!AQDwg3f@Ij+aASp#N
z6>lSN8c92@t#wapj*BGu@tPOfGt{nCb(R1!c9!dwG7owm<UD+qJpLiu9~_QiDTQEk
z%Vk_j*{zTi_o6&Ev-MCm(Xg&R7Q^2n1Y(xpYqmC|?Zu`loVXlIzICY<?^7f)O}zRy
z6%QCO3wdc8bwa;Cft1mqCCE{c@*L9DPTWCJ77Zo*0&>g-2{py}VmVPR=crHk(j>a)
z4#*oW*mU4U^Cafy2OcrL)Dx2ST)iwdrWNBCES^H9faen7MXxsO`5Cu9k9i?KdPPZr
zfw|#6EMZ9JoZOL(dPp?*dptmNoz51JBIZNf>B6Ud<VQG9a%3qnA2G=ty4oMaI16f)
z9x*`0P=jkqsu<i^=as4O*N_=r!#Ti6S%gXWj8mP4+%~PAHb^9_ThVU22W@F|Nbghu
zb>ux6j?F;hcpJRo1p|_U9tI3%6?C%=^g}mnxArAIt~;8=qbCCu!gC0<>zK(VcXltM
z$aUFc#M`~kIEq-+RelU4UEoG?cvqpg6|mW!W<H<2Fd*v*TnmIBVsR)O$5z@yt{BB6
zNraGP<%-m}w0e>HfFDlkMB*(wF{Oc;*91tual6xqk4Ye?Xf*FoS>YerZRN~d&cQOk
z39Q5Ne-ud<bYZ*O{m1I;UDqUC_vD~ya?aRIPD)XSMaucF24Pwg^aC35!G>dRwbum)
z8`Vw%N#2&Iz<tuLf8YJf{s$k?fNa!u*)ah9`M1&0mKvxW|G~QQv?OSO-YdkTJ`pQ-
zNkexD#IIBBx1UNN6Q1F2(7fId!KBUzc~jzp|325znYN4buW^Xv5opj1ZTu09f*Lp1
z&YKUOM1MS%ER+rMGCA}itTMq}Hw}Kt*pJ5k$od}2r$kAufR7vMo~JgP*IBCyZS35i
z*HN|1)6np{Lijhv9rTH(WSKL9RY!@<hvFM{co~N=PBji-$H52KP6OdJ<0HM=*v{Q4
zf75R=0E)#jhjokKKcrR8XOFYMWLRQxxjOM}7SCD@uOT}s7Av}g*XD9=93G)h<j<LJ
z55Lp@JlIJ$MzfSS^_9IeUSJv4IE0CzCD~qvk<ZV@*3z84##x4Dk;0pqyMNwY325Q?
zu1N4DIb}UYCH=PcM+=rzB^e@81(7#WOTX_URm)lp?|FCJruDI`5J$C2w-AJjb^o%|
zq`1~<I<jdGbMntIb#RD(t~FJdAkX?PVVNZf8f51g1#W9DZR)Z8Gf4t#6EAsMS~!7)
zAK7_(utdF@J2z$q)9NFtcY3uEzV5U(US&QdWPa{G&#JqS6OuW^=Ut@0f^L;@@nM{v
zpMOm0Ba=0bU>$fEo(2Q~f@>Q#x_=cy0C+!9OZFZ?jtWv`v!Qf7q!ofjGrLw81bn!V
z)eO;CSTh*?Y|9e!4BNkOT+5Eb8aMahQyN-h+tH#k?a?_t=F}K*(QUA<o)C7Uj$Wva
z2};;00Bh)3Hu#hhCf)wg@=C!RxC%`|u+=j6XNS+URv9nCXO+nwn6;{pTz*$#v;|<j
zBE6>jsT2Z<YJ>Z`HTI5K#s5I=%xvRyFN@FHYp)(N@wmG9KC4G1WlsZv42ZLp;6Pzn
zQCF!OK2Bf}__&C3d3rnzqbi*o0tjMF3QZK1VF}kaoc}sca!qpXzauuyKI{x$>tP3d
z+v5GJ7^Z%WT4$B_ouK!Oowz9)W}@x7PYve(BlDxApg`y^oHj>y+I{y%RTPgi9*cx*
z#Xqw>t)Ok7k6JI}mT7AX05qzg@=F$ouK82_`nMwV<+-G2O?c^}Wf4-XzMKgaN1eGL
zUBu}prT9aM=U#(ddTl$;i`V#PIpPK1K1R8qnBY2)-}AxAXVIb^5EPc6!ZJxLBq%pP
z+;-v*p3|QT?9ddZ$Qsk#AyXN~U&{Op$;Z#bF=9gUE?26cc}hL~>oW&`6!iei*{N?t
zp@~xIO$!nZz5Z%+xD`dZkyLrV|0Mm5ArWG$A!*2^%o>`f6(@*GXE=Al+lERJ;`3w4
zHL$aKX`W#6giLlaEG4NRxxQIa*8iofuuvPBhnO8Ja(wi&mc+p?9lM2j+1c%PE?zN=
zRdg_%<p{Aa6U~w#+!98M$|~xKLcWz=A37ncBGc@ZKbJ{eYcZEb>Yq?Zwx*y7HRq{<
zj$A7%-dKQZ-CH<3PI$I%95LHjWX<qjo7u?I^24hT<{~>svZ8K{l32S{t5y6bT<2ap
zR@1hhe;anxhnjP&m(aQ((u27cs%{IOJ@q7P&QObKY{Ne&*NHQEal`6S43>6JKO$qr
zoW_n7fr=-)iZ5$VqrYWV3>r$=&fPJs*goC{N3*jjAcRR?MwXB@On(qoX%s!*l1jMl
zIi%C7j?DN=e13uCpHYaHmD1=R<>R0vB?4gu?$sJT1lO)9rxCg6SvuZ({3|s&i02z>
zSmT()&F;^4M0>=BYw^$|gb+SE&H~}$sfIKHlfkQ7J^DjAi0`zb)E%%!l_33;62iC2
zdKH&*E8K#Q7r}}vvdx}l>V#DKTc3l+efyg2;(b<Wk{(U%EjaWStChVfgPyvJ6|2GU
zJ+E@epeS_AyudYdAqy=KdF24r=rhtop5DAHs+-(nK{)glhY97e{8A2PfcX7Zi@82z
z_i1M;R}>~jgG9-j69`=kBt0{o+H1{VAjHqAVnbUUzbCr(S6_FzsO^$M55lxA)p~&k
zETD~hMITWQ@INs4ULXSf)qm;Jf;rO=@-gtyo{1M7%86p92}~?9?D!J)PS3(%j8OJ#
z+KK_F)x`GEj%*!tH1RQ-7e){|mA#%(xFrz+9m6?(=NNFgcF*9T9mzoTxb%aU#uRq;
zTlPx#v#h^BD*$Uh@?$*Cyx64Mch6ToQcw9_Jp;PXtrksZ3Ti)>C3(J_fpGG*D+{pG
zrkwdW*$U4^pW_W1Yfa!>Q=-CuR;4}sc{%q$d^^BEY`9&I93?5sDBv#%r=!tADS%6q
zj~cmW=m}#1f~F7H*NLbEQI@oBbfs4qqfIyOc8Tm9X%inK-@?vyjBR&euT$3E?r}H7
z`izMareD6s`70A|h=ri)xL3a0#1+iWL2+*^FbNUfJOy!mj{)y4LDgYnX+<uB-kgc}
zAR!pl8R_tqkXMD`X?b6A0Kb`(*KqQ1#D(oiBG*Seinov9e^`#uvVH8jZScdKG-^<=
zl?r?;jk6@>9Ui!Xk5{ftdD;4-sj%P&qiOh6-%-oJG7*ntp|KO~%SeWY^9?1W=+%Hq
z8saR)s;ZsiaT$ATFxZCyhW*s_l?UqrW<)9WU$s#92y|Iu#UX9x3_sFvyTYSrr}AK7
z74>`f7IG=t8-LX}bBA=96_eYAZ9sNsA~mfM&bL}_l(=S(M_cz%S+u(802=NQ4QbXa
ziiivRe@2uC9Y*}5n+|Fj4r*`Tbc0G9B&b?5{$}dNx2l9YR^V;y!sheTYzya8yyS7x
z)`q3WKh?<ee5yr}4+bRd>+3`7JXW=p`bR)l(RSZyc7>m|U|O!|HWla91(@PT#!9)w
zqmooRh!Gg4>mA0=DNRTXvm}kG@FU4h-SOw*A}+Pl+$}DWZ4|G~P)DW&>>oLMiX<s|
zPlHPX{kv?bSS%>c1BZ;SJCiF*cPZQnA5~B=nfqQGe7tCu_0IwebmvNT@cs<As|JP;
z-E5`5Ko#e}y;xwx6h2nn3eLDou^4}k%CrjbqE~q8NW7?^nJ+2(daDvc@IL?5%;n?e
zEWpt-tLow`g8SAZ7QOFCr2$K|ji;D`I#)>y=3!kQ|3(uC(I(ws%?x43B2)hu`|U)C
zdiKE5)4?mVgGHsOgSU`B`jMy<oGn(NWo3FIy8(>Tg>Q?c_=*&d-6?|3BW{F8M>%+4
zYK9Eu+$jAf0BUVK?G3I%GEWy^{#_m_^XJz270$K84vrqUEQHrTEb~t&7r3)I<b;j!
z0dyI4Zs3=9-A9@s@`uP%=q=rdI)b?3a-*EDo|HYvbTD`qwZn~6Xse*(5xt_Y2IzV?
zA6GlQ79DVoE<L<(Np=A%`4;AcWL^_4@?Y)>G%j(Uj4A!9OMPQ-8U*|95Ufg-uy*-V
zA&pOR-jincY=j?Gvw;!`DBQCl^hvnq?7=^EM<L&7G#9L!)-41YoFaCqZ3&hjIFO@@
zoGCh!Y0H;9KZ@xIe;)$Emr<)xh>I=gjLyFM)b~}b@BV>U+aoUEgqrCJPSB*^9?8)8
zH$FoJ?;||9K`YrB-s=Cy$Dtb!$at6gXft9Kuc!o7Q*l(B+%d9YRo4;{aht52nW38t
zpegIVnKI~|GA0Pxg|!xK*GH~UmAjoy20?c}#h{L*Eupz>XkLOAA7MmW{!4mbo&tiB
z|B`5je%ef$JG^=Q+m_$gYDwvZuLvSo^=MKQ0P7b0t-BnF3G;2&-_z8ZvR$%q$f}i>
zM<UUziK-i*1sZ}h6t>0;AE|5|#9!;AH5TpCYrO=TSir<R(0A0PCyE$~JWYL%nb#z-
zKLb3-S!tY0#3_Plf61GRzDI$-`>sxgUD&zElW?M#r->re=|fNItN-S8=P8*(DJGJO
zwD|CF8)oPra1EAY2;lgFMIVkIa){nZGrEvej&t$;*j|c5Rybx{k5p5di9SglVgLH0
zggkr2i)b+kuPaif-HJEF@Uvq{MTPqFuHk~>Pa5sFep(vs4VkRmH@R$mwyI;qajMv(
zmm?LT-}&$#@2LleO8e_NQ(MjXb1@;`#p+|b?3=8&d=m8Ipd|h3je-jd^vxgGt3S%o
zI!FnNj7=e1v~t|JUT3vH`W|cDQ<K^9G14Pc3MJXU0P%dk2rSvKlpcjue=T2q0K?uV
ziMJ|0WPH1!Nziv=l>di<0wABz2F&xfIlcJS*F}>((-!!idh(#fJOE9=!;!PKacMYy
zI2YFDOn#ZW^o@+{upiIp&%0s?P$%(dj=*ZavRvU)_#nn<SH|<#h6M!~c|#3PD=im5
z5ezo~+2pTEX1GR?Y(}!gaU+hTdM`ME-gLRPoH9n_)?2l#+eTr6V<yH=U0G_W%Qe<a
z#T%GKpOea5En;8c<(?{VrJ7*$-C8<%XA$F>NSHHRQ4QvpDr*9X5eKw(CFp#B2Y05v
zjTI5YwTGIMKxk@1tp8@)$Pa=V?>$PN&9XBctyh9*(nPu4_Q&0RfYpN2+YN`=2SZ=q
z-(&ZIR5B?^09!%RaJO^1PZjj){}2PU^Z(U8fN^CHUfZrOzIdXCDt-C4GDXz&XDpcS
zBKovKX=ox<PM4@A_kTV1zb~}1yH#qZcVM|M7PE0t2GD;rsSto5dmtc>E4jMm|M8%?
z0fJjt<^P)Te?I;X1^$Nu{~uC7B%|9WKPQlLA5X3B3i>Uu;9WK2zg0N@Q|(;KeEd`;
zFrzrp*jP*Y@0VrF@DW1&#orUmPMvQAl(iqz)>f>abX5csJVu$mgz#oYo<QSfm3UOS
z8S3u;YGoi)Rs*{$8A;fG2&_gHRQKXpYq{QkNGmNYR1jTvjfWNd-y{6RC7P)2#%))<
zivQTHLLyXlLs`Ft7U6#kGM$g=wwqcn*N^^pDJjJSR@)KA{wc}0BP^E1A#G0oeKS!+
z12gZ52vcg8<T2j4!;atd+~?oVI%Z!nOQCOtX%OTAN1MN31)yIGoEzKy_#g?~;LU-K
z{!A*Ye<fzKUMObcB5f8vTO<yiVNId8<?(pKx-@c@wv8N`CM|NbHT<;q+lh$ZpNn<M
zHh(PM9UkYi?91g33ZS~?J11wAoOzcI+I@TL*DQ1N_jjQXzrVMT(KM(H7Zu-T=?X?^
zN!&cjQA!d@B;FPJ%3DEGm+(Ne-<+}Cg=50wlpJ2v@9J@h0&!m9eWvxWw97t@2zYu<
zv6D6}T3Gc(+NyF)I%xJzguv7`d~tZ2^_=lMaK}@Nx<<25f+^NEorao)y5>EuRB!fl
zTA`S8gH%HpRia;^<I>MUu|QsHd?DlI9y-y_1f-Qi>yuMlQ{j(`Uh8exWEK1vDioCV
z4Ls{GhYIvMgpJwOTjUg!)KIu!fB7SOMSq}{ZQI_Z&b)TlF@3MKG^O(m^pdD{CWB@F
z(%N;)Hakx~B~BWHW0p-30ANJ>`(FT(W9J!zb*_%{gM@QdnQ4C&8_HiZ)q}^Ksr-XG
zeh6tci_=vr7?}{koM@4C(Xr4@axKc*g&QyuWHHQC-I)VK;EhI1D?**VO4?#4gt~c%
zH&+58x1}V*Ki4W%^yQ>C6vavhjGXbLZgFrs$<YVv9pCG-x@4d3W}hxtbYPDrctDU7
z`6D)E=UoOE!#N|KMS@aewaN}d4Yh43U!{_7Jrw)cMsOlQeUYO~R6Rc>)!(afwDE@N
z`}V%(i*z}lsFXlm24`P_dU~9UO?qNe@`H?b7NrjE%4*(F&m)up8VvrPjjC(;E#_|`
z-~mV-)2RP4?Z0udWzAt{W@pH@ghcSAv=>S&%2OW6w1lUVBjr6gu7p)wpNzcRXBK_g
zH)LKBRgaTZjmO*-e~$X^0l}h&FoACY9j6MXureo>Ow}3x%cdsYC-?OVjmdX+QTNwA
zo(IzlgS~DY5{|eNDR)|hQ7f+(f;NSfHX0Z<-UdGRugR_*X~}(lE`Is#IO9+D91;QO
zo_6jetZD2q{8F?cLS>4|b{^VO-O-(nIHq{OOZDA_@t5uBp-yTuNR2dL0;YSG<mM5?
z&8pZq^93OLQT$PX$*H!&sE1G>_7IJ<Y~zXcd|s7WAK&g|h#1!<Hl$@_%l5cM@cW-Z
zsJvhF9v-hK6us!9LoC}C{jb~u#DsNze=ZUcPiO01|C)#IDC~)SMPj{RFUyo{idt~o
zpxx(+t}69Goo|VX*%dUiHx4oC%yrL<2A#it_N7YL;vv{ea2ni_yPd)xB3|*Oe7@;4
zn>Q%tDU{v4V}_DYZ;5zTNH$Pm*hftw=d3#e)7sO#({qK;fjhx-HCDwz+hCPZVuani
z=X9}I%$Ko(2r@_KKi`b@7m=5uaVN}+-p1JnouC_eXWDOz8EWhA8odiA0xj=@g#DK)
z<uwN<E~=>#RJFXH@_UF**(3=Qxks#T<QfQ7K+5|Z`>w4o&4d^R7m|lQL<ls08-)=v
zjs^Dp#=eZqYv898us7*_Zrx`vv-j=t!J#Z3&&)c;gd0`hEXd#-7-Wj!1n!a*9_~BR
z_Y|VgJ!c2fYZ(LOMWOB`9~5PN1}~a6UNOONJjhZR&gH{(reE6bypzX!!upfIx@P#e
z(;v;)I_4Wemz`5qB$qdh<p>4u^QIq7eUQ)McsDK#$4qKHq-sC$H`^GCE!y>JE@&~l
z^kZXwEl!tvy}!B(&?vF=6cSRIP#<MpctmdPTtBmT`0fwgn!y4I<F~np&+|?wY3r|#
z>HT6OL%b&N5v4NHGh1*wUdXcZdKVTaw-4>O0gX1pY!awiWedG!e;@&%zaE)REV0<*
z8Lr7hmh9&et4A1{DSY2Xmp-n9U*kx}f?XWD!m|ByU-ldH)bH6`ZrWMTsw+sp99*kl
z*ybqvbXTRa-S#YpclSK5+*@e)-QH@cCc#gUw6RqqPm^!At8kxI<+iBQPF}UxK5q(_
z{(1~vpY^RJ+i%pOL>$(^E6-$bM>6YIyJCH|#-lFc-ks#<TjUVD8IIxPK5`5_U{GD0
zICB`iSU_GXdsEJK?}@BDa7VoK6aq-2P`y?Vu)|)hJyXD2Na#mkPLYYxN(a9e^}H_|
zJ!w(x$f50U&gZB)pmojU@l6KM%OsX7DyGUL!oqS@X<PkQ<0Fcfk*I9DK|{`$3!ul8
z{^jS{o(!3@vmzcs!`|VIi1ryv4<U`=<1WZ)IZs2f7=a=KSG_Xvq)ciaCqhm6g-`D2
z_+@9zwGLDK+Fd=<0rLWh?@>cvPr|c|&b&vP7-!o>B%VTnh{7(ET71ruG>0S$`{Qb+
zkQ`g<v>=XM>kxOJ>3F2oo$mCNT<e&lY-F3zST<qvqiiQH`MtX9fJWCNUJbj2!Oy7m
zz@Ci>JSWYearqKFD1p&@KTlqthU!n4veVkZ$0R?&5Ld^bJJBgSsEd9z$#2}<40|15
z5<UM@aRqHU0+yBbt9`I}CHI#)_JIU?Qxmd1ck1sicfWFCy`n<<P=<2WW8VGsoC~AP
zs6Vn5vI)yR^+j$CQz?!*2j>%PJ_Wf*`1Uk*){_2WJ??d=zX6^4k0tQfoqBh<*kbv0
zz`Mlz{#R9B9u8I9zONJ!6AEEumvzX_2$_WJky3WXQW#5QY}qpQeIMH(k)6qsK|+|Z
zM%l8CX%w<#-{w2&_x|4Ny{^w6GuJsY^PJB)=X0Lte(w98cHt`1H|r^pbK4F2oPa^G
z+EHgwAPRU<+>36%8PAjhEvQ#NQfH$Ce{X@nXVYp)g1_FL_?%1jr_p?j+mC*@H*1+h
z%4G1o(j)JlBHQArryX=$kwa&}ti+o7{OE1y;1#IjKG$${K~)a?<Odv@T(OcLHw3ov
z4Ryo!EPO4AFYAX#a3=^mk1q|$&Adc}5sf?d9?ZC!B0AkS(#Y-)4CxfGX92X}YsXSL
z*ch#Os!oj*HeV9M_TA;3;l=`OHZB?+NaJg_O9RnaR(@hf+%g8=G(&WxXe%R?gZd7b
z8b)PeY6Z3iw+TYsfVKR*<loubiXv?nppJ?1_eUYrkeeZgi3T?ID$C`HpM9@t%&9=>
zV&lTw0G!m&mi_MASI`x!)Rc(F?2|yk=`%Aqv)G;pdEZC;(hp{<L(!uJRcm;?sVZTg
z6zw5Cp68XX>H@y`-{2FeX1xk=3<UvX2LRl-CkjPlT-ZNK!ellDewlZv%L~gd!H1WE
z{P>_nLh?vRWDz$QhZzk|^93lA;4&|AE7aNGdps~UA(&^%r<GmVVJiV?T8!#-s3Ny4
ztx`~k-+;#4N908ma0iy&=KIIdsmORVXM8_DKW$EC_L?uj`<F<}7DlD!Dva-W;czvo
zMGZOB*SV2rK#lj{FM5u$f^c}Ozb4%b`F-VI!r)4lP>)<U>hmYOPP1}3=B-N;Fx=2^
zN_P$VIT^sF7wL&X$xeQtfVbK6T$egn$mq6j1HPG9VEu_ROnEiEt!Zs{b9r?bZTDFw
z-kt1s!J1cTVcc9X$ld=+;+R+dt3-CtRhP|UDH?DLQ7vZ56*NQ$znTS<c~gIN2ZU~}
zS|iC9&Zg=$xSZyme_a(eb?5}@;n-#q&ml);K16sG{|TwIw#x5vfk6KzX6=Vk{l|OR
zXPc^eNioH2Xf1rLhwD|af*BFrcN?6jG4;)SH$$*UEB_pJCCKZ5msEc>(|e$y_C1wL
z6V|QN3D0Nt1M~>-?&e=~AyNNhL+9r8cUQFJlaNZH_mye%EXR;@u(am{#Cd;5vf0O}
zOuEljG-uWV5skqL{AujlnCb7|R#kL)BuO<Jd|F;?^|{_Q-inJ8Fx%ylgnz5({}6LP
z@oX_Iqb;cZRwoIeFhz`u(?bXjh=W^;I8T13I7lX$HgwwH!$5H3BL<mu1$zS0?Mt6f
zxut2|b3|Bvt5{13Ccy}%Qd*u994$C)_LDT|(Zs&h?v2hRj0;w;awk96)a8uNNl~9X
zOWzGsgY^)0VzK%bWA25=-+f8KW7`4{F6W{R8tFMz#whtQL9luH15*W!p(NLogl?Qt
zZ|R&wPor?jJta1YTpLx1=TEOWO?bjvMSAW`0;pjDr-^tzasP|_K~&z(qifDo$S=Na
zO|grfQN1pDKIW$uOljs*U2HO+a|NJEz;P&>ejh%WZ4lFhe(fEUZkwWAnYl3EU#J6?
zlR|gM!PTz;SLTtN`yq<@@g6lRp9-1*j!{?ucUfnWmvx%>*?FkFq<mR(fc0RBgrRwi
zHI|$v?mwCQGL*&UqmV)loHuW2*q5YchF9E?@7<V*?AXR4H@!Vqr|r2+c_So@UVfmv
z2?Yat947{GwvJ<6Akh=iF{DiAel_RXZ=_AV77O^U*(P$!6zgFteSP5>cwlETe$ARr
znaY{TKO#(8Tu{*LXIqtM6;WXYs%>Sja%tdM*g9s;@uGd!TL3~ph?(3%Vj#D&x7<V&
z^wxD03bWtKFNo=AUM@2Kmhp5p&Lj_cS$Y+g^{CnYw3)4~eySfMegDwSgs3~ZGpzio
z<amue;(;aZg@VFjo;N;!u}YIkGguR%=i&I~%9>3X5sBk;spC_9Xdpef-U}A>F`kVC
zCXk7pz$~96kcm`xc81#r#av9dUWUNKw@Qb2wXiE$ocFdKzah*yEd<<1J?I(<${wmf
z$A~Z1k~#bcdV#bvD4xu!A!kBXoh0s|TdS1*B@)?Jk}?|mix>yR*?d6Hu6g<rB1@fr
zvW`ENRxP#{CrEkNDcK?W?5xex(+Cox>us?L<djqnSMwa%-KuU@i`?3kQ<xvMQz*B4
z=XihUsXWX*BfFo&E+xNWThqAmBy)Lcq4%ljUJ)XXR2kf8w&_5qoE#k^&36BG$NV0q
z0x|BuE!gl3oeE>MKl}ekXL<tzmRc`zjj9Lsx=AiFjasPW;3EE}{8=CO^TLST6Z{xq
zzme3RnWvA&j=mYbq%9tT_38+6<?A~6wacBpJo)s5+FQ>G1N6pUvJx`86yfM069o9x
z`tz}TB|Dg2t|-shJL4{I!W)AHJ+^#b6H{M0#TG-8ba|8Oq1#b(aI-#nD3qliFQx9|
z7fn0VSivz<@f?y~%~~%dy)`?~>owr%8Wz(&Y3S^Lh<;fVKeU471KiS$Rf>5shOcXE
zf0`<4yfjq0x?&y)KgOZWyfx0sQO^raVqI_D4`Q|5<e;fq#ARk!qHleQ;P=GaO5%J;
zKJ>Hg6uHt#gN?^qK&sp6Fj{O08)2xI%qn{SjE3)(sL{B|PJn$=SC?0Gj|cx)hWTX(
zm%rpLqf3qSd)qrtSMR=|aSS!Lw=g*rPNlrmfSqiwsOtssqq~v)r0dV1794aB+YEl_
zzKPjaXafZWe)X7Hkz+WwLF3`vBe$OU_Nxac+M1Y^o3DIumG<}owkhy3Semr$YO?ze
zB#6GcF|*LievbX2vT`~5UO{SEXNN@VAMpzJ^d<gM=Mp=eCFV7E_me+{PT9)gUDTse
z$|d6SPAi;aEx1}Y)A!&yDX*<`wbR}lDUCWVp$7_YS95zMrWP^*-oBK1$C##B`)mhY
zPxZaL+^-8-4PU=CKSy*z(?83VTql@=^TQZ{@4wbXLMgZkzTX?3My)a&BsbbbCR{l4
zjv)_|VDhzLCmCn!IT<ye8>XYM=V0<gflngK8^EMnRQQJ%=2WLXcodfeTV(X}2}2De
zUd<OiGO}-)1!lWQoe<mxMC`D5I)s4@M_`{QAY#86De~q9*wj0{gRKLpa+&R6+mPp)
zdq9d~)8awQL*+Ki1{HzX2}`V)=+IjaJjM-DQ5&4G*vNyK`y&TezU>Di0{atVOEZ$4
zG@ULoDWiXJE7L1EN^e~eXLMH=SEdq@6ccmZQw-c{*J{WKiTKlGr>i1gBOOiz;<d_m
z=g@a!&}GKheK*JGm;!A|sfw1MQwv8C^Ao5H&$ILuqWrT&w!yl=*XU;m?xgzHDoaw=
zH)}O<nqqZ4e;;EM=i9(Q!P50CDdl3X`?LJU*-~^rZ}QB%D5{g&YP$CMUXzT23Zz3P
z@wAp_$gysP4zWUy)?2M2-kBO`j#D)zuLglpqSek!=QipUG<YlBV2+hfKQhOU?<sP%
zlxfIgyzWKol-E@e<y_}0BGMmBCO%+0!<izrxJ?m7s~K7>$rS>u<hp98bL1<PV_?!O
zlzf*W9poQ_K6{EBdaSwcW7r&l16x6VkmpGT4+|>vjPhN#$+CG4iww?5wBo(24|DuI
z*7hq!zr`lupu>|bs$ZCAmwOH44aRYY`jRHbb$?MXHLdd+?F?JCw`L??n;Ycr5FVgA
zJ9cvuN3=lYQtxYx;u2(M3N{5d4<%=hzSrbP&f#6#Z>+D*Wjgq-if8J4@KFwQ6NJ97
zLPErH{<to%tD0!#X?nMJ*q2YC=*<w)@cue%`hZunPukAU`ZiqbOxq=ERPvpgQ$_*T
z(hq+_2zET1Y#x}Yp5D+0ntiN^Fy9Hw(rugGe2$+GdU)KD6+YjY1IAQc>ma|J?yq6a
zzodun0-ZwvGPL(ompn_P8Z2ORDLp;j)#_Ojf#YlI)7IUq-I%sO)7plXaB1g|`w&B)
z_49=KXtxAiOT?Lw>g-D6kwg&{bmd8HCo+iK_rGVn@19&xkn{$ehGV9)DYcGaPxN<$
zu@<R999!Zi&0-p4vjF$^Yb%HrcFZKt3k%w>H6vtT1H-?!iJ6gH-Z*6XWwJo^mDry|
z8PV|Wlm4zfKzxys@y5$i*23tuez}bDJnEgD+a2pDFI0?La~GN3cNWO7?fEv)Mw~<D
zJqioW5DuSu%X%f?y5qzC-WCQp0y63#^6l>7_Ar{`?rgv*RkmOT_bqrzmg(RlcGwpS
zxlPcrXz(>~eR1(5|3Th@7Pv~zqd4u>Z6J50i`SDa^XmxR61`dT-mO=lotBZhis-Ll
z6x9-8K=syh-VI)*<5-cS&Vx^Rl=HHO0n5(}NB5E)Go=1ya@M@_{@Q7M2{^5mK2xdN
z<<fXE`T*3|B|k*tjv!KzzxP%a(hi($>oQEgX|l_IZ0St~rHQRHP5qF-9$m}N70KWb
z7`%W<|5AX~ce!6{sjUJwY>gE~30yD-PlD=s#htAijV-V4Ub^;C!usX7S_9PdZzHOJ
z;@li3Kp*-xYUM$;%+o%!VhWAkx&>_BY0;$AIjWZ{DdU^zsOLLP!_y!q-Gwi-tP2Ax
z>L6yXUv%niE9<)UCrx(o(_^csvNE?QQAHDuFeR&!F)xcB1SMQDQ)se?`p2Cq*+&|S
zjWl+)*OXX)`Iod#?xB3~O2Z>Anj|s<g1jWAa?iJP+R(mHm7>a&(kYQhG9sa3F_?-+
zM)RH!o}e<4Op?6(tVD{|b4=XrmOJeUayej(kK$Ng`KL*)N6nq@3sJL%5WkV7i-tCN
zg=vPTY>?axx}Weq9FbWlsa@$8=uu%1_@N$${iUyMtU!8+2tS^xmG<zN+o%-q=QLAS
z1twNq+(xDxql*Py<#d07<^?H%v+HF|Gq~uNxdgHV>ZVYyHRHI^bKP{{ol{byOT=3H
z&0kzDLPs=T>UXpF`+fbv^*niKyA)mgJ)L-!L#EkMXfB5~X<)tH4Wqy}Y~IjIQr#PF
zf&fpDH>R6k7qt!<Np#hT{MKIla>KKg{!qHKyy*tXCLX}%pY#XSEVk$Th6eToiDN!!
zU;YC2;f<Mqfxt52`%KOl=T|KExz^S95Dwx{4WBBpIHsICZt>tH67k2qRI6ZIy}9kN
zL7Q{hKP+BIoqMHZai2Fr4ChG}j}4<GLSRXjyE;iWnMyL|OwyE>)qEkhS+ZGr%*2Ly
z;p{=9wsqRzk9PgY^hkWp-xSIXTk|}nJ5ZZT&GoS$?)dY(wU4Ghy1k{tA!}*$!D2JD
zbIE>vfQNxma@fF;joCfvt#{uRGFAD$p(|*T_8G?_jOzN~&Qv<ZYdK8RLxF(0L@%SC
z2wDU66c0pD`<)TBs{{)$u<_>Va^=5eQu{pF;%M9&X;r8K<+v^#s2#Vv<Wxk*6*mws
z>X^EqWm(=Ovtn!eEoackD}$ik*PxFG|11Qr51b}7r*`s_)B0^IhpxQ(YYnbWAkC_j
zZ1(UM;~cjJ?;qq4xL^8AA?;wNB*vx6%kM4PMgg4h+ETtU_1Z_TF8yLjD>vjBFtuc-
zhD7_g>(*|vZ)o2Ga`C|~q#C@gOXD+=tfuGLQVpL|bP3ZHZcMTJzE?8wAo*X|H0R@-
zUyf#6gc%0V?m1-!tP%PcGp=IXHaZC^IWzNO_nrVT!q4CIUnDwvOqSFJnr9P4%tJkI
zj(xML1H#(+#`(!*0PN!iogkj=Cx(qgl^r|&<K#V?PnLH@k;P$KCY5q;K#k3`_LA6?
zq|>am`Zw#lEb>!6jPV%9$pP%s(fP#j&<Z~@_$!%-{`<3S;i~cP3=*N=c@uQ*!0B<%
z9`Pl#A9Z@kulv=CVVi<{mSb`wA`CLdTZ~lK&B@MOlYlaiiem3aFU^0e<$RRdtX_B6
z2HvmMAfic*Ntcx*kx2C9%i13CZHGlv&+!gz3p9Cms$I6K#zoF_RV$v*jOCB>1jYMq
zmAsua9U(RLZ$544?C|x4q`gcT1Ag}O?et#NnEoO(WY#QZ2F!}916Ak1h55dBJ^y9x
z*e%Vn^8@P+Bo{UHx6jh=EG6LTvBOs!)}O-r$Z<m(w<sGczh&lAzcr|7G0j$5Oe#x~
z#CFKTst^Kk-t9!X5$WNI%=)d*sNbK4!S^Q$P9P{y9kU3uJ0q>JC<gNNFT;mX>kJ#4
zzX)&Ne)sx8vpnBQ$#Sy#x2?;6eq>qsI9myoAT4Kw+{0_Wej0fOs?~~_k&BaOY=PW-
z{ewpHp*$kJS=iAWXhWpGpPo4}-B51yn_0o^CXVdmW;Dx!9ds$O9C-xNTVSeJZU$bA
zo^k`dV)<o|M&wT#LOS1&ALsafs}yGkOFk1&xt4Uib~r5|^%SbWd>Vs&?!YN=93!_;
zEq6wjg!r>pCyTA34mFg<Vr}3;32)<0?Ae0c979WJ9MzHuF_^iQ>18#{mVA2jm@5d!
zmW~=OtUD(nGcA0m>L~v*0uZ)U|G5KUfdUR63Y+{prPkUXOFBD{b6>^R-Y~Bqx+;wP
zgR;cZOl$3@CQGfBE=Lf`d3(4Md25ka&(__GhC!QE6r2}0DaeF7j@{7$7({O;rWkew
z3|}no9En`vQCNzbs~es~<xCzhzj(D=K73Dce7wk}^B#*DPhL+|(BpM+A+Q!)IxwZE
zKVrD20D9~ierWcEtn6|e?v={X^@fAq(j;(!-<9~u{p2b*4H-T_31-o;Jy(MpyV#AI
zI9P;H`q*M~Mz0jAfKTXAT*h>AKaZ8YSSlhbLO;nhSYUxB{0wP-JCXV~tw$H;yZ1$x
z9RM{7&-|rjRze4j&<u4#u?`1%KTi1XGQ`U%@^x_x+s8f(d_`$Xi1g3f|Lq>W)M3Uu
z+6#LE9?~pK@byi3Nxdz$+%arX0EDViVqXNY;0~Kv<+kD<tpYS7Zv&lv>?KfxaH5rM
zWu15OHPuIdH5JE&R6a$q-WhsrNf}6i4dG_>AMn=oj}(0EI=vp@;2%d~<EU=eUQnD7
z!f*L$e6b$%2dFje*OnClGywt7v&W#CbV#9<NMq4f^Z*qd$sqJayl*?@zz1K|n1FP8
z0X`cA+$+BI5yNw-H8``VdjSuF2)FVk>vGBJ-yHkNl6$q2n+BXpQNX~)Nxbp))Ubjk
zLN@qX0NL9~;k<{8?Vp3}uM6|RcS^IEtz3OC-b#jN0~5NWf&jYG(dg6RI`am(34NU#
zTv6+H$WDSLk2vFH7Vn?I2InZx&2GH4FeHU-Cq*cy6NBjDmpv7ASYL=QbD@szGzUZ%
zbvpYXM}6^E{Y)43SUe;Js|G3ySECEEvmS1eWG@Go>mb64j&gEEMa<thc>QkNQaJ6X
zGg|wy@R)v$z5`6QUF}*O3uNehIl1djWozb?T@l^|LshBVfXuWxN3@Kx^sKNNJ~}!M
z5Fc~J{UBHg1VFn>$sj@CKajxfd<mdOYsqVSg~sWd#<*+(hhhdIf+gPFLtEW6C!aT;
zFZp2_CBtLW<4WuimB&dLH<}E%`>KdNsIeI69`8Y#a;7M^cOoeV4%*2=?eCCXDwzbm
zkz)*idh?VCJEUzi&4a6#uBDODQ=nY<kQ#sM6}^SjfIDyHsu~tw(}RD;hf+x(Gbo22
z{Vgh%1+71nU7yTV<+NN!93Gz!VBJms)xd>^5_9&4q8RULb7>OL9mLJrT3Ntm1!1^^
z9F3fh)1x}9rB=<t_4@t|`$WZcz3~W4CW0^_3-t=-8)^_%g{8+yXO?`Z`YA^iTzHEL
z!MZF?_A7b@KLZR?+}Ui{ULlZlH@`H$TK=5ermsCA)A~jNlqEh|h5qo%Ju_0&;w;;H
zaLInaT_)O#KZ<n^9pg5FzHIGS60x9FozuF1{aLhkmX=4V_vl5+hmQ+8Cd*KUtmMik
zmCak0a}T`@F=vOCb`k>CA*07#y7*Xnm|x6&vbIrB9nTedxa2myz`chen|!OBu)gnx
z>T;;-)zL~}#oqYQ?%b4PGtegP1+VNBjO@<6nb6Z=kM622i1SmiBp1+pl`?0wm?VSb
zj7hAn@X!ehFwN79*PZoOWvZ?8y)VV?pO8yviL4&m3-cpA6*UicZ)G|IT}GQ49ov}J
zJX$*@C{(ILraqgYftm%4pb?PqSH;8y|A-o`>EB@_xNZlb7+FORKm9wIh@6Fjp)8by
z`k%$d0b;ZHU57h1D$xFK<dPXPNZ)tKM`|ffn303b^&hB#VV|^y8*D95iu&rIlf(^6
zbrB+P<*|htRMnY;me(+pqP>Yk*GiMUdx61n>_EdE{6ce?f~=}ARxk)MlbMB0V?W;C
zKc)N?Kj8GJD6q}aDoi3iIe}W7kK8bnT}U0;CMMHo0AMVLk(%afGO6yAYbu804UXMz
zSbvVwCjGpA^KNfb=ggwcVNO7VEakRCY1GrC?Ltn#A#rnpU5VvMSH^QQkQJdh<9!sz
zWZ>y}@0||6o-dI_Igx(QfVt>H{L{l9v60ng<VKztsyy>7OWEiB*fTYF(c;Vyel~+6
z`fl3vy!hJXHR)mlW?828!F>T7qxzulej5#J!;h%}9%f=+b|IRhd#41+86P1Q|H?OI
zBywbDM_^(Cz`hbdAcl+es9D$zmhW9~sZSYVT&ddv(k&h6;JAJ)E0FuusM=8s0KLj2
znun|U6yNjN(8p)=db*zfIiHtdQ#sVCkY<=pSI3`lVng{hq?`PJn8Lg~M;t8@18YIS
zH^`QP!BvIi+1}wZ^a*bc5(lwvTAL8HT-h7{B+g!RrM3Y#tN6o(_KKT)ihc2$Lfb6M
z0*14x2iIncH!ruBR=lPq-8admUT-Nts-MvzG=*N!Z|4t-qO9)N;V!uM#`D6qBh*<V
zN^<kH2GgpI7Hx#UxsM=R!G}yiDou2yrNAg!dFIOStrQ5{+}xhbXlb@G%3@{q1^z&I
zmT{+Xa?M^yxZ>KIu}{=n#u-`~<b`Z$Yb$49FC~CgtIO38>IVn8*PfWD+_~suSrxYB
z^=ZN-{B$TBfEIQU@?!Y1F1a)*C=nUjGtHte+%vT4V0eS?=>j))2o(c?AD4v{Ead~k
z_Zghgw^<H>sYegpexGtl{NwDZo<9yvoVcoRO+&B93dghV=5PF7xX&xxD$m?_fJ0<f
z&FJm#pxjH<`ReeLPZAG;qCD?zn=Q8*mb_-l=ZwD>iwPmFvcrHs!s;r4lTZ%T&^BrU
z{U^KOw<wHub+T@ZwP|P_{uXEII!CojkAjdRej;xiRjp<fszM7zW=d=t1m|C!^{uP#
z;F!9u{z^xk7%Vp!@>x`MoC^Zi|C5$)9OR%0u?qK$P+LS$nC*parR7Tiy0rd*ba%Pa
zDB3ZKBHa>;#fLYV4d~UPTFF}cBxM%4_3>4xHqF@`_MdU*QY=^3Gk^9I%52#pl=Drt
zFK6ww|7`vDqWMCJTGV)fa}hCTXZ}Sqzsk7*_bx|J!hd%&j0|>A*z)k0S+hdbUyr^K
z_oC8Y{-e=9Wae)jO1?c_D20Sj3}4KZJr|l=ODOohpCeb_ftD{gK_!1<7^c&F-h?av
zo9zGnz~ig)f2P3}`&uK$A9yec<BbKYx^xlxbOXsVNquim{&TGVA$&d{J8Nxr^4;Zd
zD*3Ex!qH6g8dUR7kN@8&hs!RY5k-!O&Ryks7^{oZ)5nDWH)H?4rNEfxg&)EW1l-TM
zeB=0*5XpEMd~v0$>Ayjp9R)CiEDsZl;2S()m-}RexXtl_ZX1rI72*4zY0K3w7~OL>
zkGLAF`lFFHwVoxcVDyD37vc!C?Y-<eT#k|;`<wjlG?U+Qs;MP(uLXxy2$H$z<R2rC
zF{IjCiL!TlBgWhRv%jIo8LjJ>LRk51iU0qjR62x2ZMUnd$gBxvKKp|ZZ4LdqRqEEE
F{|CI7K7#-N

literal 11194
zcmXY12RK{*_r3^%7^TCkP@^bfk7`M6suZ<q?@`n$YDK6~YVS>rnzf6nU0dx$?b@V9
z?cLw)_y5oHWIV|oxu0|Hd*1iFCtTy1A~`8NDF6WE%1UyY0DzAKKf6GQz}K-~o?PG?
z$!jHjHvpjM{P)D`asFfpeo5ypujj7iZ0+u8=4u6adV2ENIXSpln7y{*b9S{!-;tsR
z063s5ht&4U*q-ywFnW>5zqcT{K3~Fil2xpk%Ld8KP0ta8HW$I)qhX?BsB+O~DAb2~
zxWyA7G#DY<k&`oqT0-5QE{98Lz;i_DpKJR<gNZ5F<esp548L~%aPs@;`V(olMk4(W
z$&(D9-E05Hr=!}F&in@?#YXF8-|9)!#)Z|!r{CMmFsF)qwODU&|0SZ2jbra`0{;7j
zzUb@MnG#CMM+8gf8F5!lM>y<>jggklU>Vs6xTA2nizMmVEjS}ZjF2XCF(n1Xqc{&W
zeHLtH4W-*?3{)uc{B?omMJu&FJ;HP6H`CTp6xPy6asI}n#lq(+r@Uw%V>|u=((ekG
z38j$d%sM3+1R5mh93oRa!PZiTW3>3rM(p6kH3czpFa6Ksk@0aYdb;B64EMm6maTC>
zdo1ID$8c(m-?z@GOuDZ(#+?wA47BWkD8LX&4Xx>UJV&fym1B>(NBD|OFv~(m{l*{q
zh0l)&Oc6DIh3NL1$Fj;I7l8W!KoEqgQlx?m+ZY;fkZP>a$!&e4HY<Z2KfpOA-hZpj
z`g!Qq@>{c8$n7K#mc|~1;L%oS&)?UtW)<_-#47Vb%&kz}Ev=1FgeVNY*X>}Nl*!>G
zT1ws0$z+=}lh1d)h$+nxm_AH>Usnzz*kIfx03XgGVLR$gsC93r0!@;*HQ^Y$nPY!B
z-%7m4DM;c8MKQ^bj~F4SFX!)Dop<)GkL`rdS?*pqKYh)ligzj$7JxTGP^cg9db}|3
zj@I9R#7E{<v#2U^Du4<>mR~{qTKkQaRj5jVrh1j>r8JNv2PY^Eg1;p*zEadqC&JbZ
z6CHp-9%W;d4&#`GG}a%nLV3TEq~Q~7kProw>P0#)G*ks#xf3w<{QQnOmxeuf`ms$^
z>^JdfLppq>J{l25<BaaTzv53bF*oNpiId%Z*U^gJd-H0UI9;163%x{~^4gP1n;97|
zEx`~CW|rBKbL0iGL!gCuZf-G7P!U+1UL8i;HtO9!Am%YUn$h&$?sj2}&bekrHR@x>
z*bX^tx6Bbc&vOPk5tpm(EtQ6R&bo+A7QiQ8`{h^rb+wpZ-k6s;ucZi>@il%l<p|)Q
z4Myz^Tu`*Ml~H7HtB9=np(ji0h(zh(IXQ#q1pQ0>dKhqjX7U!$3cuBFX($qKeJKH+
zHROH^^esVtK_}!FZID+TZi=$O$!@pU->lBSukr4|L!=k?5Pls73tpx~RNYivd0p9_
z5YaPve^D8r5*5e}Im%d1*gbiGyDxlZw{^a^xbOHEq4b%Fo?dUP;r)I)fbzg0WM26i
z@FjUNev-H8U#+7YsJ@Nfq|wLb))73u`Ra1IZUFnn!$WOzQWiYvPhngrfnKt!iviiz
z)>buBkAWsL-MJ8+u1GtLK@d{0GLX3`>gZt)S1Yqx1KrjZcPpM;k=l9q;JSX&q>kUG
z&l#kp!N0bFyYZyfiyb=>ZuQCi4tEN}69I!`w)mungjAUYOR)={Kaa1q&d_S<M8=Ts
ziUhW$J!{j0L)p>+QMl<lrCydyc)Gwwe^VjdRD_JDWYm#cj|;_cQ;Hq@*7zNPx?P^k
zkU)oVzWeI+QuM<i9r`g2wtPME2duee^^4-GvTa_T7!O-VJ5Kq_Fq>p)iA43|tLp{o
zgPt)y6$k$;n=91--nZ0)*?}W?84qq$N+Yj&UCMkO4f{A^j-GbcZASy*wk6r>ZLt*&
zO!4N*jr*Plf#-P}cOd6k+TsL7;!!o9wT&Z$NG5b1zUK9mOx;kx@|r|*N$r0Qo^+;p
z?MARQmZqVL{Y{u<vCv2ANpKokhKp5bi!D>gdc){I;wtIobtoccBCySGdF`vNuFmx?
z&UO8TEcGp0k^%BqL8$#Z4INcCM%+!n$I2+QFmMxwt#WF=Df*;F6{8PKyY{Au3SjG=
zX}kQQBWjh#8H~xhK^r<e%CiUS%I0b+tQ|@oODPK-%1sw3pT9`YfkUq?0W4XCOXa%$
zU)B_?Ytz@SyukTnZW1Tw!zjN_%j{bJt8@R5bMZq%1bG{pFlAc;qh>lwl*60F1^~3g
z-9-M&O~)kq$<;sggbovoouf<D((-%0@pV3UW8B}Ssu@w)Q3puo?;8yD_Kv)RY~TcH
z4O#X$!R(RxxlV}OW+B2gnmRf;HC{H^TVA>TZt!`SM!xphJIaHmT6*>gcOe5cW>g<>
zN~TG;I@yq(^MOH30pO=Gw9&=ZA64zTxO)uE$F|v6f{XDX6ve|><i25<ZIkxL8~C!V
zHnIJqX8WE+yg#AoaG66NJwZ`ZlwY=${+#|K0t35>%9nsiHczJKb2f{MKuf*=F<)Vc
zUa404Z-GS}alQ&bi85(ozaLJ!knw6(-ejRo64f&CDX;eOwc~{s$2H#c<`FjkG_x?!
zq^aVo+eh7FsbRCF(k^^DEU(^P|Myy6o0ngDdSH)LU-J&LT}PT4kK?MhuU@vgXgJ_K
za;bVfXPK>CR(cB^%3aH`IDJe9Zz2ELdfoTYsBNpL-tn*2y*pNS_4#IvtK91BP2efX
zJTub}7HCPSOoQ$#k-YKg+)?}YN_5TmYT|y{4$`n|vElmYS<Lz&#-9Ji-jKp%zSV%s
zZ&exJM;vF5$L75h2`-gBK8x}O%i3$S^+q@%+h*oi`u+M^TYiAo5N!ky2`$$ZCebCc
z5)m1$mxgo)f=FPerQl1>ks8{8XqnBiuZ1;Mc<==hem1Z-3qGp~RCpB}X0faQyt`Tb
zF#Pm@YsK1WY@tl;`C%k3$r*oqla+@$>J8(1B^-K}s?XsG*=^G|B1@_@6|&=nL4ohG
z90N>$zt40bMD8`<5(3CZ9)BoPb6~{QM-#gxNe{Nb7Aau{vcWc~lLjb1Ejkga692U)
zc%OU#380avl1awda_=$lMqX6CLdCF5cigbeV|%&%5>vZRa%zLEi#od6Ahs)yeaE#;
zyPT4gO7s}k7O$6(Q$bY8jM8KM{Bt62Ujld&*MDyjpK$qEakI6oTnS!;VaHPwHNQ+c
z4V7DJ@jgm)WH<k00Z}ZA9(HBliA<o26adY2R?mA1F_?wQo$4PD<RcO`hY3ql64NlX
z9`#Rl)(^bv{n%(0-iNewulu8RI159Si(mLnZfOarrHMHkHySg~5|b(l?A-;(H?d}m
zOQW3xn|om9yhwHk$$~CVpB=3K^}0Gg;&r2T?+=JE!b%cHCTfxi?N!0UoD%yTZWm}4
z7wI9g2;uu`Xp~4Ie457f^MjXB4lnsRTFTQlUM)~^I#T=~8|K;mUgbgpT?$g6>;9$x
zxX**B^nE^OER^%YKVd*^JL%QT;|_R>v>gQ<;TO$fh_ay6=UPr4gSnjXGSW76nbWtq
zBWETbEncp88+%hm{V_&8Q^P6v5M@!IH$DI%<GsWHh;5XG-H`??O<gCQ!=jeuL@E0g
z$c1PpV6SYV17axp_Or4N*X_swcr6}2fXF2KhBsEPSv=vt8#+1)^SG~I!W_n;*`vB<
zcZLP|lV<<vGXhbW_c9A2=JVr}d9uFJO}ynGb<pOX<h-hU8_TTl7s#V&KMlI%_LbC}
z1;9TRt(}(3DJZG@)~pkw`qO>>14R(rJGu9bi=nhSg8#(uORX4+hVY*5iw=#Q!z_#|
zI_H?G*Ovyh5b;iMew;6v$NpnT$a!l5h6f`6>ky;umVSZWTZn7kPs=r1plsa<9nFF_
z6Spz5KOs5Hgnz1t>rJyIiP}_EDx!PK$`lfq)cD)SfZY5e`SZh^*?r#%T$A}XfySOS
z(%v~y#`s*eb#Xi1{*$ZLo473~3PUQN6iQ4)LES&*`op6<zD?<1bD;*d3|)wR21UzQ
z9>f9{{)a);x3=-}A^;eBZwdXoShoUFhG?7tCo0AO1a46=WwEz&!cSXOvzES3DQK<K
z=ArZlj|SMQ_ni(YVd`(M+O%)Hb|}z{Pv0o7tDQI4W+(batQox;PI;qm)OE-u!>CPa
zQ@%+kU&cM0S<Yu=yLPY1gLd!gwSHNp7(uB`TzlL8zv7!v;V6XlFhW#3<q6P?#H|fy
zFC2bhdg?>7a#6Q{cbcY+2RTQr>yt0^iRwK+rF78#R9b-cHei6F7hR|p*kZrqpHSPK
zDdtzG7gK)1n!Y{J3vzVwq5pW_fCe%;10BD-NZ-?@lH+2||HMx`62C(~Q>zsaP`Amc
zJZx*+p=4wPz;BiW=8TNR#~WcEGohyPU*YrN$|sHE`p2ggzePphDGl;?6CCC*CapDL
zs!SOZpByf~i}l)VK|J-{Do1W5!X=r5;|t&N!h`}yxtw;uRPdBd&*kiBZWzU)mC{E|
z%<Bvs2~u>GZnd|iKl<vMp>3Qc9<?0E&RIG({gb_vozv5Pvs9<_8|*q)X5tmr!}DhE
zsOzVqq9Vu|ZZ&>0_es^k;)koNfW+Zv$-9bQQb~8a*eC(o!|mGiT)bt4-lJrW(Zl$B
zo6C5^pA{9r-kstnDH~flUabuxd8e#^Tfsi9{c|4jk}*M-WI3o1&Ifoov1t1D9q>&@
z{P@D3s^#I(y<Iz8L|!%n>260CL+&RHTKxoT@(PE{m?EBJs8=%1V7IR+DmCp7hwYps
zj4FO*PJoQa{<&)Ao9YJ+EGds!5_{UV;Gg_;xz!!i3+be}<8MYhjlu4JuP$qZC{fP)
zOi|`hvi`IyHQb2EGeJbk0^0T0decKA7Bxrvo+)9Q=hjT=u^XBG?66is^oYqPfZ&ch
zsA|4x8)Kf*@ZjBiw=|`p7tBFl^?6kbERZHFu2$sweL}U(0&0?EGWv-9Kj0$K*0vGZ
z)_^s@=F<tmr`G+5k3%C|%}*{F3PSh^?}aYh^X_SH4=j8Y_iTEqH-xp%d2sdZs3BiM
zB3BO=Gk*8i@A%=ZFTz=?$cr!-b09q~gerDLOof+NYS^vqft$W4KxE8~nS4HUN&1Qv
zso@hA$lLPgU{mP}-T4R8bIPi=(ak9WO&5sqM5NkSxVq=zyI=PB$}M>kKV34eAP3c^
z)JpqaD}Y9f@6o3^G4sIEmrBL6cO5a-N|WjYiz)?p-6Z9{aU#&XL`YM7yXUF@(XX4y
ztd%a0R?>14@iXyR`&4xwl52{Qu89Qq-q7W+=+lm-corz|X25sc^*^fio{+ly(9fid
z#K^(f;t_LA+*&14B>Zjd?EPv>eyUM+Zw7P<KA*dL*4XoPw=u&mk8&EkFf}{Q#UunQ
zVDsYFEvhC~%Liwb)(&yEe?rb${@MU>>W?XorBpAEaS9tdJN1Ae8*O7df>iAr*TnvJ
zVQ<QCM$OvitcnA~!!`4S#lL(arBD66!j*J>933C)jQdWDU|@c!rj{l~W(Vo-kU^Wm
z?Y|4bqcgnG0f}2TYKj@6({NiRRNMGtWF*W;sm-AapXAQ*(UCHF;e@cvFWOi}A%7$r
zczk)ylOTae_X~^ZaCaeLf;)205Bn(*er8_MwA3}7qMt{!)yB-zbE>+<`brt!co@B`
zASuH$#Lm&7Rs{ZX`btxO&667kZpyIzX)w4&>UPZ*^Kli0YpTE>kn*$fojWOw4zsM2
z07ZMWt&s3uDOdx4p5IB06k?fWbJW+-8oWWn<iiXZEeHJhZ}&D&R6nRf2HD-y(N#G#
zc-5^D0e4N%A6M{xcD8``J3%-!(8kBdhrME9#1+-(pE0svbPq9Tg)c!bV|pN|Bq}T9
zN4ji?x(jK~ZhPR}YE@*H|K?A{{mrid=0w@NDeI|S*|!E&)sn54+7?g96t3Y=drHzZ
zJ4dI(BERztx!X6S`C>n=2I2Jz*9bZ><8Dk+yBgs>D&^9>&5OP;J5m9LRm;Vu*^I7r
z8=Kw3Oi;9t$rtchp4&?92D*)xZ>yT!V+7^-1DV2aB|)w1=z)~MKijTX_W5EP<sXBP
z&bNA>3oMfcX4H$lUC-axT`g?tco*;c11^xs)xgo#Ai9e+D{v)qXIwVy75RQE0eQwi
z(?a+klY0r0QZX<xWqMhdou|W9PHc-xxl*D-Z%di|oEWlfzR}4njY~oN5%2Rv@Y(Ag
zv9{2*z0vfx7#fMEP%01p_04paw~kNdbNOURykm=CVn4@ya7*}gwI2MJEhxZf254$c
z@vMdZ6rf`hQ&>`q>Z*2joCfO3cZK#`+NZJ$y`|$YdW;7wg2-+$IoIh~FqRQ>5+sX%
zs{H4;SZemTP~Z7BIrhv!1*)>DtZGFA0S4DJp^<9~uU?_@w1ZMo?DdQxfePcg2;R*8
zD5G%>Gs!3q6+g=jJ1RLFCxJb+J}F)cIbyQ>3DV3=X{ug3&#qVV(4W>D<A`Tq+2l^_
zCo@%g7Po^Szeby8eVPzOBB``x*v{lEoO`OCQ4gw?X>%W;Y-6?W;yGL*5`55wb%}&z
zh_3_nC!UNrS8P=l%i?X|A(Veyms@{<`o~1I1>mJMMuk6Ds!qnS-h@#k5trePMx}q5
zgguIVV;{$tPq%aNi@d_Z4<FUr4B+!^Z*4UR@plqL5H7nyXWI|okG@<wLRCqbUq|Bf
zviqAN9rsFZNKr+2tVf3irei*v@E?CN97tLge~4%QW{i>0Unwu^jp#J2oGm@~Vv968
z^%s_cyKo$~bn(5!Y`G~bD~s82s**()yPe-ef{(!OphmRtCwwttjDCO@b7qxS@p76W
z@;@q+w~<vIFnZAMgDA~gl;jN!TT&_D5X-jrb?dpdvkBDHOZB*s#u)FNOFbnbG+wg8
zwH?JD_)6&T?B%nNtO^j4R;Dp?a7JG3(Ws!Ht2?a33$0%gjya$$%vWt5zlrf4Yv12z
z#CkaY@V!l#&I2U9&7_Ub?aut%_5LpY)2aXggBwC6Kv@~i)&pXgdogJcAy`S3A-?t<
z0n4VjUj()@YWR`Lca6PXNe+$Bm<j%1{nQDt`5Qn1NQQ$x2D|TmE>V}2(<)mvwPJy9
zEl~P2dhlz7N7-T_1!Q&_#YvYYY5^%LhK$yM`lZiYVM*&D#9uQnNmK#VgjYQ@Vq;1V
z9+xhijrZ(5pJ#kNLp90%)ie#zwbRDB=aOZk3_D2+r$DgvL@2H)H9tkDkLQ2a5GQQY
zz`V1qhJ7-Mvqm6?iB#G^0=GW`dvrs(%$QJSPnnr%O?>sFf0cbag%b%VY;8ROG|7h`
z?2@_O{f1acrY9ff0hu-{hKq~8+=)eYW6<VWYWZkuqtGNGR~pXy-$>GmZ*s_T+J$8G
z=9o(w3C5Tj&Ww-8d&v|9hd83`>!yhiCcI!hPk%IXjnbaij=T!ImEo`KV3S-nJg{n)
zhUlJ4%t^zAZ%+>je9RE<%CEyOsO4(8*(Oz08w_4BN#~xT=`4G4??VcT<MljblUX|a
zU*jnexMof4>CzS1H>S?>l7Of~1t!e>X7vWDdLG&gpHV^HlGSFgKJ(2%rMd}J=l_fp
zl|3j%$F?Tff^sGGoe&Vl<jlD^+n7}MReS!ah&xTK6BALG^8VdeLV~)D2kp%y`bl+E
zPtE5pU}Y5OtxQetG>-7Vq6iNU?Ro3Bwqr`BG?rT|;o~;>;9{2djf{&(DyOkum_0NP
z63k|s8&L!Kf5#pLRo6KgZ9|UFkBseteGc<9JrWsvV@igPK{w)=pG^vTNB0P{e*2>1
z!-acNJHhVHRG2W9o@3|A_|w{!=*osRBwBe@O|C^bjdG|-x#<^p>96j`%>oHH`2+LZ
zG8c(wXH<xJaCsLWx_>NAQVsTrV5*saUasR)!zb?$_I3<q9;=lthE-wg`2&slU4zql
zFU^zkg;y4~EZir9cG+(BK8QOh9MZU*4*!3CK`PnzAHS%#q~4HhZ^p|*ukaw%;;Di*
zSQnzBMm=Tf67So|d~>oqCNGl+7n9{z{s>A6B-JG?YSF5(d3kTo)OvwN=qCgZNWB+<
zmt;>W+U=BjXI*?X8gEMLL5yv|?uGf#ue79_1nLZ=#Ap5uuqU!=b%n8BovJr+cH3GB
znnH)Vkb}bD$c|LoSdPs>QKX+AfdifUH`A5&3I~!o7r_#+gN|>TYPv$yc|X5ef4r6s
zN^+SB4?ErjNvDlbWD+5@_)mG`{m6P#S`Cw6Qz7BbQ;4AWHwg?Iuf)Qqd!pj;E&{2J
zXdp-LcQYfT5g8ci0JtlC|9($4n`2Q#(;IF$g~b*{$J^_ZK^wSrrs}A*`QK$h=2}u{
zq)7y2zN#dQZ87#prUr_st_o}vf*diqKb;S}qvT^46O=>2u=u%pYsGJJ*e-W}xFM88
zs<5-wbR3`rX{`8Kga0F;m;NAlSJ7NI)OH#+xh1fxLy{MAe8W~k-b=y`A|#l76z5s*
z=t&|b>uh;D8}`Vk)0F;XX*ZRX7AyQV&b<&!16rn?uGSl5md5t_I!l2eU0W*n*2)PG
zP<_1#FDqo%;7)UQ#mL*9waF~aG^egq+^~Nk*Kkwji}qcD^-!}Z&O|Y-L_{-XhuYYO
zEVj(hk00I#euqf0C9J1%1W=G<ExGGs@JKKHbKe5pMo2v$jR)!uQXP*5PqPOH;PZG-
zvsFOBMLkgM6qRQiuWe-D#@dmV->Cv^7DGMTz?Lk#GTKO^&pq3FO^A_)FWQ^sdhciS
zmH5*Fv7F)-YGWn~H|2+wR@68s)8P=LM<UMP1m%*T1Rv-8+P|89UNTu^y*HWvv<!$a
z`XNaz^n<2<Oi-4R5P4sM+pOCVJAEgTAYLvdSe-yzv4o>sf=&SQscV<OHfmkpxkNiJ
zTX|x_Jk3e)(ESB>;vK`UzJy`<Ye$Y*Y<?*k^P;QfV3l@>lrIdt2Nz0g%z-uGx7Dgg
zc-LQ<S^G!Gkbk5FDM!L-tQ)J`K<Xwg6%$i2d--kz8j>gf{&Zai{Xz1VaF2@<487>x
z*k5a{#c5yx9V}l>p~TA(L%wtYnKS-za=*oh+J_V{a{hYX`rXQ+5tSMov6LPe`xoe(
z^GB)8;j7j(H|0m^vAh8!Pi?{=)Yso#GCF&R9;)?$-Abbmr_`}WiZ9H&L2WAZu~Gnf
zB=%Kj^iD{rQBM<DTNR19B<j>0V5a6)b$DV%d)!!Yq>G3%dSp5B3*d_@E)n?$c2?+E
zZxHKx5dxXtyH1n|qePS!l8>+6rHUyyhyOGrk||RUR5FUNdv#c1=6wT|nU5hJ*PQdr
zEnv|RXKqu9gW>zNxn%rh*N(&Fr)+Ze$-YJ{i=ThGy!~`(U<3^!^Cns61)i@7U#ur1
z<8O7p@#Io94Jybjs=i0zX3g_1W$}gk#bDEiST3R<iNo{*bBIY&|9CC0{PWrn<QI-e
z7PJA2W(=n5)GUySHA<Q_W}F&yN1_9xNfg#fKu8JEPX;6QY&t&Qt&RjASfF$cZmSv^
zf;QxrmJVe7K6noj62NtNLD4x}{V=|%&{37Kv_Q%KWPIk)%wx^GyX6uB<J__Eyye)=
zT`oF=(i=hdj7iIl%`F^*o+@gEr)@}WthjHPdFn>YhF!__lGn<mqaPZVG!I>rYZuXd
zk+*TTT}sU-(Z23nFswp3?PHiOT=6Q<z~Y_RDI*Ym0rT{jbB%jkm$WF3=>eLRx%qKm
z6?j3CcdI%+|4gHYYv4oKPnEl3$i1r#J`^)mq|}VZTqft`{d8T~os%a5uqv#5T0QS~
z^L0`pW0g2$8xAbDNHCqb`UV9ogxyDs=Jk?9lHqS@3-9DKi^{57id)AuJK{ksRzcM6
zk4sHobp<mSZVbyM8GsC$>nuk-@HE2dp(qpeY+jYW41&j`qsr}OMrLMSinI;T5OWzy
z{WEFs+3nQAa(7~s^ZdY)Y?E5HriJL@lr^D(AmzC~Kkq<X-a!3LQe9!fbp{o`B69os
ze>B4>zW?4on(@t})MN)(Rw)(%<kplPc;87YR4bv@{!0^_3M0B==(r3tU^^dA$4G%K
zD};;#d-c@ft(epkjJBq2$n^^$VOjMq1kMIoshk#Avz|2YIlh19!Iyu4R$Qu15Yyz5
znNx}X5|-j-gmahKnP<S~TbNq-t6MCkmYzqe^jJDLkgR`o{|)|;?U16t>6_U6QLV<j
zI}W0O6i<AGrYpg`DHc{v?_MW709Jfyv`Z&NsZP0zrHJN#|2!tfYja@)ofDDrOrz?G
z<bm<p;)AP=3%C?WLbz!gH1gr)n<)YTvfL4<;l>IcF*n|I#vTLGk;nf@3bh5D=OlTF
zR8rfS3i0k0Vn{%Pq=bjX%@q%5>(c!JT0Iocn;B)2_4p>1{&~8pt9H}wT=MJqB)c;3
z^L)pdzxTCbF;D-2o6KpWh9OCqvm!%=<lzIH41N++LDT}|`ysRnd1TpN6mlHc((~O~
zK|ZYXG)_8`Z{xFq7<zMAGZ1#k3N7(e0T2B&*z8o5Eqls<4Lu<rW)PoCyL=a(dZRlq
z%R<>$HsRsi+}v%3Q~DL-AiUs%^eS*+EOd3Rl1*38ps8k4(2-_F6Rbiz&$9RFuQQbS
zo#FCb2GWSwL(bP~G*mo>s4lGCo{LeE2TDWPfTmb(^c!&Xj5|8;0L)PZZhjlmd-r4)
zh~RIGpai9U(oJgI{QMxo6a3`ZtjXt47W>Wt1sLAnW(j2=Ct7$O&e}+xR>)2zKUxE8
zk}!jD5$J~9%oky>j+B0883K)b*e#Jd^SjrhEFBt?QD5P8i2tY1F(gZ=K+~J$o${EN
z5=3Nx1L|kikcM0U`$oJ12qo>25BUZaT~!7lD@ELt`=DJI=vvD`1DV_n{6pN{`?KtI
z^c%=nB><2;{5K1LOFF(iIGC`kmy9p2FPyNUYDRmlA}plHjjTln+N@BYWkzXTf63=k
zs{xT_oKYY4Iw~#N?jKW8n@QuyH7GwuOPqPo6ICcM2-n+x&g<<W>`0}kacRZ<xbYB~
zcnixHkOaTJ7$10Q?#Cw~0;BDS&*%EXwpln0IXWUQv(PY(%5_Ezp{M@%sl`u7e67U~
zmJ=$4PR+SI=zDln%kjKeP8zc8)b|wMm_U@-Wp2Mcu8~~Zoe9SSS9zT4=fH~r)tQv2
zve+cJu#%G2MoEYKo+L2erq7?%?PER`hEG?T$$m)XS4oqNTz&ND5qo%AEr&&lP2t|k
z9MQNR*m+~iNaqIoqS&fkf*6H4$_xJkP!#cXmSvM~a%@84^V6MZCP1LD>RuP*TMmcY
zexvX&uY(|ulDfVz1)p^W)zt*6jHQL`UIg5BvYsoWiTvD9e^h5ZW%|$f?D}JmTHwn5
zarJ&vdu4A#-JqL8SvBg}Mwr?%nb6fz;S2;%hDy!^PrSx1&EiW}gyBw!9oKs^w(R#m
za@fHVp_m6RZ`4L_kpk-Z)X&7tURVCbW?3<%sQA0qNcA+S-1tjLK&GW1xZ!3-`#4$U
zP1FnY20cEem;FbxauXMLC*Fic2<oT0=u$x_EvxGb)#7&*J@LH6%_3hscu(<QGOb6W
z>@_3Gp8gl)_YWffwP}p*!0LXpFeZfr>@%&2W1Q*qV13B2fKX--tf$?p-JJFM)Z?<A
z$Gg`!p%*VEL)%Zyf|8lS>DEQO)YMev1Tv%Zqi0gDJL|Ut;)<0VIG*1uXDL+AZ-4~u
zjiQwu48Jx{xG?g6udDAnDErbFbp`NTCIGQ!zBE$^&GqF`v`}E7{cp356Ue<g6QRUK
zbKK*#AO@NW{vblkHV~e5-+vF`4;rsM1Zxd#Tz2pMG`91*CJwA1Wn*gN<>Frsw53B{
zeRnJ4q4&Zo{U!dxtis~aAgMIr=|}>$RA?g9p>8@P0o(Vevupso4(oVuWCo3fd)@6a
zaT;+=Oi>P?8A+q=)I$eLBM_%oMoCAvbK`0`wCeMO0Acf#?C)*VuP-^VRd+!;lOUW$
z@y~q2)+IMa9Wf%WXxkp<V`(V`vgsgoq4ZTc^93VpMv+6CYH?2INZY|RLG(ES)wszg
zH;{LKHerjN%WO#^$*SPuBA$hH2?4vPuDCahhM#`KO^XGc)j|aFp}K&$QVCzVciGF(
zK-bg-BgAF^@Ee0-Cuy4@@p_o!<jREm0x82EzlRu-*5*#EOF4I#H`mqm7Ax2Y2>PP~
z5Z83@kxM7YU%pe(xH7l6GxSA;%4k^}6OR7OOzD!b?}F6G93!V;tF<jO#6G4O#1<r~
zx2362ZHEWn(fSdL(_;W$VK+R|pH5P;I^<C{{TtsnpedJeuip(VzM^voc5RgUY&zx!
z{AQD|T-;D$w|AGG=P~Rl!B=`yoA%mOXwbUK@d6rjdYvy}d2?2!fLdu{rD^Af#-ESj
ze=}7gFYuQ25q}AZbi<2D+n6tM27)C_r?c80Qtd^@d=>UlfF91NQ2A4xTmC5(k8+!u
z8W-o{zx~j9dwyo-p`C2Nxb%Pgr*%UT!x@WF=Ons)FNi#*P`Lzb@>duc$GR_-Yc`O!
z`2kykiFQL>Cs)!w#%EBSLoFaxM1Vk;cu@?!r-1l&o=OpYt$5dejB_uV2k9<#NMvh>
zq>@^c#GC-l>i1nktCYu7IWD>icSxo?pwI}det+?S?6)Fnn*VHoN<QW8DGz!zkw3ce
zFN(FZwV&?dR|?<^_M_UQx%DS4@L*O(s#zaIlp{Wvn=byYa6LPUui&XO#ei-YgX!kk
zqxWW(LeWa&nw6l=rd9Gixp9NfKE$ro;B5!OH`44k<>DFWbyw}`qPMDW7&AET$-YyW
z^0{VvZ=T?$YD)fgk(DSoB4$3S`~xye4?$YT7mM(cBdPq@|9H?xZif=t+mI#Zixk75
zSz=^IUkJPeaR#oT8w%24^TehIoMxIldsv0pB3*lL5Jx)OoMt*4oLj<^DfwAtAJwJ$
zu3DH^-5ewscxi_o^z@Z><82YJWjIh;UBS+&5OyZKK&h_CKq;G%YbFz}9=EfzleM?_
zRP~>Pyn9fD$JR2Ji5d_+KC$tC`K_?P?C{8C$=P7lJofRu=h2v1H2l*k(p@}^?}?At
z-(&RYp30}__F-+RiMwngo`m;2!xTd}GT7!^({uijO*~x=)!aI8GU7<^mYu<4u0pby
zOUVz>IGk{;K-F&OfQ;{bT3iiZ;hzm;jmAO`*lM;kQm(n@CfB5V{s6R3CFXk!oM<4C
zExM4s<W&DhRg%u2FMEY%w=>uQNds+Q-29xDa2P)gmy!cXi;15lo6ZILwKJwpxEBcf
z@h50+@1K!2*p#9(f)%Tb3XCAUM^+Z;vff;`!U1iAf4EL~=m;yT2-Zp$F18zb7WhMP
z|Keo6==1nU{Xf2S>hFG&PMn^^AMS`REpY4j=xWbE*!R-ifegJNNtAUvQmU(FfO+ct
z&P$Jf48)%ySh48L>ec-R=<Ck1x^Qr|1G<zRx7J0ZlOxYD;^4}ixDlFJv({`@CZGF`
z=*K=;*P$60)GDs`Z0bUG0r%$FhU9qao#LE+f-M)F_dmW1iweV9K2pn%&xzX^Xhm2U
zP{2nMNg<4jO_<Rq>~_4Cogw6PDpZ-y+`kDs#71Z1yTt-4dnrrEc+5`A%OdPN{+Tf{
zo}>ybCCW{SVn}G`UNY?ZSo?8uATsaCzC~vm#xn2zw9;Z}{2JA=N^S28skG*Q!vnjD
zJwlOlBn@<ZCY_>V5wHgWgG+oZ%;mvZSK|oFs0=cY9tnK`13d;}J!0szc{JjfLzc&H
z!Fkman<knY_u?B-PMxwKt=@(>rcYDEt{j}dS_f~{5;Ke^laE@`jI}wDPSEQ(X1T8r
zr27jK6O*MsJky8yApUA|%vGKR)LwGf9*V_qe7(A`8h>Co8G{z)4ORF&zPN(51MWCG
zAW{n*F<?+DsO8o#kcS2#F5=Jy)S~*OkfH;Lo>|vr2t14eEP<lJSgq2e_$Tp)?dj?H
zFa1HC!NgR<M{L&79tt!3<auiF6^v%7lK)lvo1<SyzswL(CT=L2*jHLI*A0__MV)?w
zx8MaA(bxJjwS7j`W|;k6C;s&;5pFQ6TP|wv=jghxHf9l@KeKe@=j`k}=~6cQ;alkZ
zG!2q!4zrO4{%Lc~1{C=v;yLN)l6S#az%e)+LNZxU7yfVf47fa*l1|a({#NLe7FwBm
zOl;wadPA^hFnGyk4qJ={vLK_oshguwQBfv`$H!}nBnkDrU`%`6e(xeq#baeUF3)>M
z#@<Md**8QT(W0*EFXcmAyr>{!Uxp+8fOK56@g7ccKQe49QRT2E{PB)AW+p*1QyGJZ
zjI!YYJMzkcY431Jl-AZl$_iDz>W;Hh+5HoX%~;O9`fa{W4Ij>-KUZ7oqpVo{2YRTV
zllFEjrk4Z&<@<g)Ln2e27y=ae>t4BggN#(;M1@}8=*CShN+zYBh>3`mH@eyEQuTb-
zN;R2WXRw&XRg1UjoP42sHF5KMCm<uoD_n0zYWT&s5~p|BFJ14UixII)E3`{X`XDE*
zCXx<Lno19E{hqxO{0>ks@Ae4^4HdI8N(MJ5v@-zvRX-;^%2=>`Cg~o$|Mn`hDk{%{
z;I)m6Xjtjkgyx!-(bCh?_cq=AZ%noN&tE);)~=B^tJ^ogxm!S4{+V3G6I1m60L&LM
A;Q#;t

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 7be565d51260d..0b0a2b13fb52a 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -381,7 +381,7 @@
     "nifi": {"requests", "packaging", "requests-gssapi"},
     "powerbi": microsoft_common | {"lark[regex]==1.1.4", "sqlparse"} | sqlglot_lib,
     "powerbi-report-server": powerbi_report_server,
-    "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8"},
+    "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.1"},
     "unity-catalog": databricks | sqllineage_lib,
 }
 
diff --git a/metadata-ingestion/tests/integration/vertica/ddl.sql b/metadata-ingestion/tests/integration/vertica/ddl.sql
index 59a71a1a1f7b5..ceebcd8e9ce2a 100644
--- a/metadata-ingestion/tests/integration/vertica/ddl.sql
+++ b/metadata-ingestion/tests/integration/vertica/ddl.sql
@@ -1,5 +1,4 @@
-\set AUTOCOMMIT on
-ALTER USER dbadmin IDENTIFIED BY 'abc123';
+
 
 -- Create a Top-k projection
 CREATE TABLE readings (meter_id INT, reading_date TIMESTAMP, reading_value FLOAT);
@@ -35,12 +34,16 @@ SELECT tokenize(phrase) OVER () FROM phrases;
 
 -- Create a temp table
 
-CREATE TEMPORARY TABLE sampletemp (a int, b int) ON COMMIT PRESERVE ROWS;
-INSERT INTO sampletemp VALUES(1,2);
+-- CREATE TEMPORARY TABLE sampletemp (a int, b int) ON COMMIT PRESERVE ROWS;
+-- INSERT INTO sampletemp VALUES(1,2);
 
 -- Create partition key
-ALTER TABLE store.store_orders_fact PARTITION BY date_ordered::DATE GROUP BY DATE_TRUNC('month', (date_ordered)::DATE);
-SELECT PARTITION_TABLE('store.store_orders_fact');
-CREATE PROJECTION ytd_orders AS SELECT * FROM store.store_orders_fact ORDER BY date_ordered
-    ON PARTITION RANGE BETWEEN date_trunc('year',now())::date AND NULL;
+-- ALTER TABLE store.store_orders_fact PARTITION BY date_ordered::DATE GROUP BY DATE_TRUNC('month', (date_ordered)::DATE);
+-- SELECT PARTITION_TABLE('store.store_orders_fact');
+-- CREATE PROJECTION ytd_orders AS SELECT * FROM store.store_orders_fact ORDER BY date_ordered
+--     ON PARTITION RANGE BETWEEN date_trunc('year',now())::date AND NULL;
+
+
+
+
 SELECT start_refresh();
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/vertica/docker-compose.yml b/metadata-ingestion/tests/integration/vertica/docker-compose.yml
index 84af5c32a60e3..1ba7990c826b2 100644
--- a/metadata-ingestion/tests/integration/vertica/docker-compose.yml
+++ b/metadata-ingestion/tests/integration/vertica/docker-compose.yml
@@ -6,7 +6,7 @@ services:
       APP_DB_USER: "dbadmin"
       APP_DB_PASSWORD: "abc123"
     container_name: vertica-ce
-    image: vertica/vertica-ce:12.0.2-0
+    image: vertica/vertica-ce:23.4.0-0
     ports:
       - "5433:5433"
       - "5444:5444"
diff --git a/metadata-ingestion/tests/integration/vertica/test_vertica.py b/metadata-ingestion/tests/integration/vertica/test_vertica.py
index fe306d1d0b2b8..94ad33ba21ce4 100644
--- a/metadata-ingestion/tests/integration/vertica/test_vertica.py
+++ b/metadata-ingestion/tests/integration/vertica/test_vertica.py
@@ -1,6 +1,5 @@
 import subprocess
-import time
-from typing import List, Optional
+from typing import List
 
 import pytest
 from freezegun import freeze_time
@@ -17,13 +16,12 @@ def test_resources_dir(pytestconfig):
     return pytestconfig.rootpath / "tests/integration/vertica"
 
 
-def is_vertica_responsive(
-    container_name: str, port: int, hostname: Optional[str]
-) -> bool:
-    if hostname:
-        cmd = f"docker logs {container_name} 2>&1 | grep 'Vertica is now running' "
-    ret = subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL)
-
+def is_vertica_responsive(container_name: str) -> bool:
+    cmd = f"docker logs {container_name} 2>&1 | grep 'Vertica is now running' "
+    ret = subprocess.run(
+        cmd,
+        shell=True,
+    )
     return ret.returncode == 0
 
 
@@ -37,28 +35,22 @@ def vertica_runner(docker_compose_runner, test_resources_dir):
             "vertica-ce",
             5433,
             timeout=120,
-            checker=lambda: is_vertica_responsive(
-                "vertica-ce", 5433, hostname="vertica-ce"
-            ),
+            checker=lambda: is_vertica_responsive("vertica-ce"),
         )
 
         commands = """
                     docker cp tests/integration/vertica/ddl.sql vertica-ce:/home/dbadmin/ &&
-                    docker exec vertica-ce sh -c "/opt/vertica/bin/vsql -w abc123 -f /home/dbadmin/ddl.sql
+                    docker exec vertica-ce sh -c "/opt/vertica/bin/vsql -w abc123 -f /home/dbadmin/ddl.sql"
                 """
 
         ret = subprocess.run(commands, shell=True, stdout=subprocess.DEVNULL)
-        # waiting for vertica to create default table and system table and ml models
-        time.sleep(60)
 
-        assert ret.returncode >= 1
+        assert ret.returncode == 0
 
         yield docker_services
 
 
-# Test needs more work to be done , currently it is working fine.
 @freeze_time(FROZEN_TIME)
-@pytest.mark.skip("Failing in CI, cmd failing with exit code 1")
 @pytest.mark.integration
 def test_vertica_ingest_with_db(vertica_runner, pytestconfig, tmp_path):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/vertica"
@@ -72,7 +64,7 @@ def test_vertica_ingest_with_db(vertica_runner, pytestconfig, tmp_path):
     ignore_paths: List[str] = [
         r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['create_time'\]",
         r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['table_size'\]",
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['projection_size'\]",
+        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['Projection_size'\]",
         r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['ROS_Count'\]",
         r"root\[\d+\]\['aspect'\].+\['customProperties'\]\['cluster_size'\]",
         r"root\[\d+\]\['aspect'\].+\['customProperties'\]\['udx_language'\]",
diff --git a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json
index 44a5e07d7b996..ef535158165da 100644
--- a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json
+++ b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json
@@ -11,7 +11,7 @@
                 "env": "PROD",
                 "database": "vmart",
                 "cluster_type": "Enterprise",
-                "cluster_size": "122 GB",
+                "cluster_size": "101 GB",
                 "subcluster": " ",
                 "communal_storage_path": ""
             },
@@ -20,7 +20,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -35,7 +36,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -50,7 +52,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -67,7 +70,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -82,7 +86,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -97,16 +102,17 @@
                 "env": "PROD",
                 "database": "vmart",
                 "schema": "public",
-                "projection_count": "9",
-                "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_prune, xgb_save_model, yule_walker, ",
-                "udx_language": "ComplexTypesLib -- Functions for Complex Types |  DelimitedExportLib -- Delimited data export package |  JsonExportLib -- Json data export package |  MachineLearningLib -- Machine learning package |  OrcExportLib -- Orc export package |  ParquetExportLib -- Parquet export package |  ApproximateLib -- Approximate package |  FlexTableLib -- Flexible Tables Data Load and Query |  KafkaLib -- Kafka streaming load and export |  PlaceLib -- Geospatial package |  VoltageSecureLib -- Voltage SecureData Connector |  "
+                "projection_count": "12",
+                "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, Unnest, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_kprototypes, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, arima_bfgs, arima_line_search, arima_save_model, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, chi_squared, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_arima, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_poisson_reg, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, tokenize, topk, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_predictor_importance, xgb_prune, xgb_save_model, yule_walker, ",
+                "udx_language": "ComplexTypesLib -- Functions for Complex Types |  DelimitedExportLib -- Delimited data export package |  JsonExportLib -- Json data export package |  MachineLearningLib -- Machine learning package |  OrcExportLib -- Orc export package |  ParquetExportLib -- Parquet export package |  ApproximateLib -- Approximate package |  FlexTableLib -- Flexible Tables Data Load and Query |  KafkaLib -- Kafka streaming load and export |  PlaceLib -- Geospatial package |  VoltageSecureLib -- Voltage SecureData Connector |  TransformFunctions -- User-defined Python library |  "
             },
             "name": "public"
         }
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -121,7 +127,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -136,7 +143,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -153,7 +161,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -168,7 +177,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -188,7 +198,184 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "create_time": "2023-10-13 11:23:05.308022+00:00",
+                            "table_size": "0 KB"
+                        },
+                        "name": "clicks",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.clicks",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "user_id",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "page_id",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "click_time",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "TIMESTAMP_WITH_PRECISION()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Table"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -212,7 +399,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -227,7 +415,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -243,7 +432,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.358215+00:00"
+                            "create_time": "2023-10-13 11:22:37.846965+00:00",
+                            "table_size": "2119 KB"
                         },
                         "name": "customer_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -551,7 +741,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -568,7 +759,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -592,7 +784,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -616,7 +809,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -631,7 +825,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -647,7 +842,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.368954+00:00"
+                            "create_time": "2023-10-13 11:22:37.857152+00:00",
+                            "table_size": "138 KB"
                         },
                         "name": "date_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -955,7 +1151,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -972,7 +1169,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -996,7 +1194,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1020,7 +1219,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1035,7 +1235,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1051,7 +1252,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.375896+00:00"
+                            "create_time": "2023-10-13 11:22:37.863745+00:00",
+                            "table_size": "327 KB"
                         },
                         "name": "employee_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -1320,7 +1522,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1337,7 +1540,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1361,7 +1565,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1385,7 +1590,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1400,7 +1606,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1416,7 +1623,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.385843+00:00"
+                            "create_time": "2023-10-13 11:22:37.873181+00:00",
+                            "table_size": "2564 KB"
                         },
                         "name": "inventory_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -1529,7 +1737,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1546,7 +1755,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1570,12 +1780,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -1594,12 +1805,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -1609,13 +1821,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -1625,16 +1838,17 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.362016+00:00"
+                            "create_time": "2023-10-13 11:23:05.408507+00:00",
+                            "table_size": "0 KB"
                         },
-                        "name": "product_dimension",
+                        "name": "phrases",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.product_dimension",
+                        "schemaName": "public.phrases",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -1653,33 +1867,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "product_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": true
-                            },
-                            {
-                                "fieldPath": "product_version",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "product_description",
+                                "fieldPath": "phrase",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -1690,76 +1878,252 @@
                                 "nativeDataType": "VARCHAR(length=128)",
                                 "recursive": false,
                                 "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "sku_number",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "category_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "department_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "package_type_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "package_size",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "fat_content",
-                                "nullable": true,
-                                "description": "",
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Table"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "create_time": "2023-10-13 11:22:37.850505+00:00",
+                            "table_size": "19 KB"
+                        },
+                        "name": "product_dimension",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.product_dimension",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "product_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": true
+                            },
+                            {
+                                "fieldPath": "product_version",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=128)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "sku_number",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "category_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "department_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "package_type_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "package_size",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "fat_content",
+                                "nullable": true,
+                                "description": "",
                                 "type": {
                                     "type": {
                                         "com.linkedin.pegasus2avro.schema.NumberType": {}
@@ -1933,7 +2297,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1950,7 +2315,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1974,7 +2340,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1998,7 +2365,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2013,7 +2381,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2029,7 +2398,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.365453+00:00"
+                            "create_time": "2023-10-13 11:22:37.853878+00:00",
+                            "table_size": "3 KB"
                         },
                         "name": "promotion_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2220,7 +2590,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2237,7 +2608,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2261,12 +2633,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -2285,12 +2658,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -2300,13 +2674,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -2316,16 +2691,17 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.379273+00:00"
+                            "create_time": "2023-10-13 11:23:05.296044+00:00",
+                            "table_size": "0 KB"
                         },
-                        "name": "shipping_dimension",
+                        "name": "readings",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.shipping_dimension",
+                        "schemaName": "public.readings",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -2344,7 +2720,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "shipping_key",
+                                "fieldPath": "meter_id",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -2354,39 +2730,215 @@
                                 },
                                 "nativeDataType": "INTEGER()",
                                 "recursive": false,
-                                "isPartOfKey": true
+                                "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ship_type",
+                                "fieldPath": "reading_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=30)",
+                                "nativeDataType": "TIMESTAMP_WITH_PRECISION()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ship_mode",
+                                "fieldPath": "reading_value",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=10)",
+                                "nativeDataType": "FLOAT()",
                                 "recursive": false,
                                 "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_carrier",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Table"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "create_time": "2023-10-13 11:22:37.867119+00:00",
+                            "table_size": "1 KB"
+                        },
+                        "name": "shipping_dimension",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.shipping_dimension",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "shipping_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": true
+                            },
+                            {
+                                "fieldPath": "ship_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=30)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_mode",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=10)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_carrier",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
                                     "type": {
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
@@ -2403,7 +2955,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2420,7 +2973,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2444,7 +2998,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2468,7 +3023,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2483,7 +3039,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2499,7 +3056,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.372409+00:00"
+                            "create_time": "2023-10-13 11:22:37.860541+00:00",
+                            "table_size": "1 KB"
                         },
                         "name": "vendor_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2638,7 +3196,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2655,7 +3214,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2679,7 +3239,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2703,7 +3264,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2718,7 +3280,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2734,7 +3297,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:38:20.045598+00:00"
+                            "create_time": "2023-10-13 11:23:04.970568+00:00",
+                            "table_size": "0 KB"
                         },
                         "name": "vmart_load_success",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2782,7 +3346,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2799,7 +3364,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2823,7 +3389,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2847,7 +3414,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2862,7 +3430,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2878,7 +3447,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.382549+00:00"
+                            "create_time": "2023-10-13 11:22:37.870169+00:00",
+                            "table_size": "2 KB"
                         },
                         "name": "warehouse_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2991,7 +3561,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3008,7 +3579,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3032,12 +3604,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -3056,12 +3629,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -3071,13 +3645,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -3087,23 +3662,19 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "ROS_Count": "1",
-                            "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(date_dimension.date_key)",
-                            "projection_size": "138 KB",
-                            "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
-                            "Projection_Cached": "False"
+                            "create_time": "2023-10-13 11:23:05.319029+00:00",
+                            "table_size": "0 KB",
+                            "view_definition": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state",
+                            "is_view": "True"
                         },
-                        "name": "date_dimension_super",
-                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "name": "sampleview",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.date_dimension_super",
+                        "schemaName": "public.sampleview",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -3122,7 +3693,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "date_key",
+                                "fieldPath": "SUM",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3135,33 +3706,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "date",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
-                                    }
-                                },
-                                "nativeDataType": "DATE()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "full_date_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=18)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_of_week",
+                                "fieldPath": "customer_state",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3169,228 +3714,7 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=9)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_calendar_month",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_calendar_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_fiscal_month",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_fiscal_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "last_day_in_week_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "last_day_in_month_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_week_number_in_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_month_name",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=9)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_month_number_in_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_year_month",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=7)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_quarter",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_year_quarter",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=7)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_half_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "holiday_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=10)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "weekday_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=7)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "selling_season",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=2)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -3402,29 +3726,49 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
         "json": {
             "typeNames": [
-                "Projections"
+                "View"
             ]
         }
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state",
+            "viewLanguage": "SQL"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -3435,7 +3779,15 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -3443,12 +3795,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -3467,12 +3820,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -3491,12 +3845,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -3506,13 +3861,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -3524,21 +3880,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)",
-                            "projection_size": "19 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(date_dimension.date_key)",
+                            "Projection_size": "138 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "product_dimension_super",
+                        "name": "date_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.product_dimension_super",
+                        "schemaName": "public.date_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -3557,7 +3913,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "product_key",
+                                "fieldPath": "date_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3570,20 +3926,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_version",
+                                "fieldPath": "date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "DATE()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_description",
+                                "fieldPath": "full_date_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3591,12 +3947,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=128)",
+                                "nativeDataType": "VARCHAR(length=18)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "sku_number",
+                                "fieldPath": "day_of_week",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3604,64 +3960,64 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "VARCHAR(length=9)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "category_description",
+                                "fieldPath": "day_number_in_calendar_month",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "department_description",
+                                "fieldPath": "day_number_in_calendar_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "package_type_description",
+                                "fieldPath": "day_number_in_fiscal_month",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "package_size",
+                                "fieldPath": "day_number_in_fiscal_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "fat_content",
+                                "fieldPath": "last_day_in_week_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3674,20 +4030,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "diet_type",
+                                "fieldPath": "last_day_in_month_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "weight",
+                                "fieldPath": "calendar_week_number_in_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3700,7 +4056,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "weight_units_of_measure",
+                                "fieldPath": "calendar_month_name",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3708,12 +4064,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "VARCHAR(length=9)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "shelf_width",
+                                "fieldPath": "calendar_month_number_in_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3726,20 +4082,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "shelf_height",
+                                "fieldPath": "calendar_year_month",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "CHAR(length=7)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "shelf_depth",
+                                "fieldPath": "calendar_quarter",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3752,20 +4108,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_price",
+                                "fieldPath": "calendar_year_quarter",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "CHAR(length=7)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_cost",
+                                "fieldPath": "calendar_half_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3778,7 +4134,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "lowest_competitor_price",
+                                "fieldPath": "calendar_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3791,41 +4147,41 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "highest_competitor_price",
+                                "fieldPath": "holiday_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "VARCHAR(length=10)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "average_competitor_price",
+                                "fieldPath": "weekday_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "CHAR(length=7)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "discontinued_flag",
+                                "fieldPath": "selling_season",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -3837,12 +4193,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -3854,12 +4211,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -3870,7 +4228,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -3878,12 +4236,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -3902,12 +4261,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -3926,12 +4286,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -3941,13 +4302,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -3959,21 +4321,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(promotion_dimension.promotion_key)",
-                            "projection_size": "3 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)",
+                            "Projection_size": "19 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "promotion_dimension_super",
+                        "name": "product_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.promotion_dimension_super",
+                        "schemaName": "public.product_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -3992,7 +4354,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "promotion_key",
+                                "fieldPath": "product_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4005,7 +4367,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_name",
+                                "fieldPath": "product_version",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4018,7 +4393,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "price_reduction_type",
+                                "fieldPath": "sku_number",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4026,12 +4401,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_media_type",
+                                "fieldPath": "category_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4039,12 +4414,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ad_type",
+                                "fieldPath": "department_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4052,12 +4427,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "display_type",
+                                "fieldPath": "package_type_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4065,12 +4440,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "coupon_type",
+                                "fieldPath": "package_size",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4078,12 +4453,25 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ad_media_name",
+                                "fieldPath": "fat_content",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "diet_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4091,12 +4479,25 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "display_provider",
+                                "fieldPath": "weight",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "weight_units_of_measure",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4104,12 +4505,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=128)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_cost",
+                                "fieldPath": "shelf_width",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4122,28 +4523,106 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_begin_date",
+                                "fieldPath": "shelf_height",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_end_date",
+                                "fieldPath": "shelf_depth",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_cost",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "lowest_competitor_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "highest_competitor_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "average_competitor_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "discontinued_flag",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -4155,12 +4634,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -4172,12 +4652,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -4188,7 +4669,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -4196,12 +4677,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -4220,12 +4702,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -4244,12 +4727,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -4259,13 +4743,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -4277,21 +4762,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(vendor_dimension.vendor_key)",
-                            "projection_size": "1 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(promotion_dimension.promotion_key)",
+                            "Projection_size": "3 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "vendor_dimension_super",
+                        "name": "promotion_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.vendor_dimension_super",
+                        "schemaName": "public.promotion_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -4310,7 +4795,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "vendor_key",
+                                "fieldPath": "promotion_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4323,7 +4808,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_name",
+                                "fieldPath": "promotion_name",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4331,12 +4816,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "VARCHAR(length=128)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_address",
+                                "fieldPath": "price_reduction_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4344,12 +4829,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_city",
+                                "fieldPath": "promotion_media_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4357,12 +4842,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_state",
+                                "fieldPath": "ad_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4370,12 +4855,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=2)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_region",
+                                "fieldPath": "display_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4388,7 +4873,46 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "deal_size",
+                                "fieldPath": "coupon_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ad_media_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "display_provider",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=128)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "promotion_cost",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4401,7 +4925,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "last_deal_update",
+                                "fieldPath": "promotion_begin_date",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "promotion_end_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4421,12 +4958,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -4438,12 +4976,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -4454,7 +4993,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -4462,12 +5001,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -4486,12 +5026,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -4510,12 +5051,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -4525,13 +5067,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -4543,21 +5086,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(customer_dimension.customer_key)",
-                            "projection_size": "2119 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(vendor_dimension.vendor_key)",
+                            "Projection_size": "1 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "customer_dimension_super",
+                        "name": "vendor_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.customer_dimension_super",
+                        "schemaName": "public.vendor_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -4576,7 +5119,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "customer_key",
+                                "fieldPath": "vendor_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4589,46 +5132,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_type",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=16)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "customer_name",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=256)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "customer_gender",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=8)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "title",
+                                "fieldPath": "vendor_name",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4636,25 +5140,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=8)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "household_id",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "VARCHAR(length=64)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_address",
+                                "fieldPath": "vendor_address",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4662,12 +5153,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=256)",
+                                "nativeDataType": "VARCHAR(length=64)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_city",
+                                "fieldPath": "vendor_city",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4680,7 +5171,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_state",
+                                "fieldPath": "vendor_state",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4693,20 +5184,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_region",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=64)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "marital_status",
+                                "fieldPath": "vendor_region",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4719,72 +5197,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_age",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "number_of_children",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "annual_income",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "occupation",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=64)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "largest_bill_amount",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "store_membership_card",
+                                "fieldPath": "deal_size",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4797,46 +5210,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_since",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
-                                    }
-                                },
-                                "nativeDataType": "DATE()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "deal_stage",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "deal_size",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "last_deal_update",
+                                "fieldPath": "last_deal_update",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4856,12 +5230,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -4873,12 +5248,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -4889,7 +5265,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -4897,12 +5273,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -4921,12 +5298,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -4945,12 +5323,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -4960,13 +5339,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -4978,21 +5358,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(employee_dimension.employee_key)",
-                            "projection_size": "327 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(customer_dimension.customer_key)",
+                            "Projection_size": "2119 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "employee_dimension_super",
+                        "name": "customer_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.employee_dimension_super",
+                        "schemaName": "public.customer_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5011,7 +5391,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "employee_key",
+                                "fieldPath": "customer_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5024,7 +5404,33 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_gender",
+                                "fieldPath": "customer_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=16)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_gender",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5037,7 +5443,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "courtesy_title",
+                                "fieldPath": "title",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5050,7 +5456,33 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_first_name",
+                                "fieldPath": "household_id",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_address",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_city",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5063,7 +5495,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_middle_initial",
+                                "fieldPath": "customer_state",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5071,12 +5503,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=8)",
+                                "nativeDataType": "CHAR(length=2)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_last_name",
+                                "fieldPath": "customer_region",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5089,7 +5521,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_age",
+                                "fieldPath": "marital_status",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_age",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5102,33 +5547,33 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "hire_date",
+                                "fieldPath": "number_of_children",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_street_address",
+                                "fieldPath": "annual_income",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=256)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_city",
+                                "fieldPath": "occupation",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5141,20 +5586,46 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_state",
+                                "fieldPath": "largest_bill_amount",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=2)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_region",
+                                "fieldPath": "store_membership_card",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_since",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "deal_stage",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5162,25 +5633,1087 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "job_title",
+                                "fieldPath": "deal_size",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "last_deal_update",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(employee_dimension.employee_key)",
+                            "Projection_size": "327 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "employee_dimension_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.employee_dimension_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "employee_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_gender",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=8)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "courtesy_title",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=8)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_first_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_middle_initial",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=8)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_last_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_age",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "hire_date",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_street_address",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_city",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_state",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=2)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_region",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "job_title",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "reports_to",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "salaried_flag",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "annual_salary",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "hourly_rate",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "FLOAT()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "vacation_days",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(warehouse_dimension.warehouse_key)",
+                            "Projection_size": "2 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "warehouse_dimension_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.warehouse_dimension_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "warehouse_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=20)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_address",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_city",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=60)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_state",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=2)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_region",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(shipping_dimension.shipping_key)",
+                            "Projection_size": "1 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "shipping_dimension_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.shipping_dimension_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "shipping_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=30)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_mode",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=10)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_carrier",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=20)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)",
+                            "Projection_size": "2564 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "inventory_fact_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.inventory_fact_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "date_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "reports_to",
+                                "fieldPath": "product_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5193,7 +6726,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "salaried_flag",
+                                "fieldPath": "product_version",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5206,7 +6739,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "annual_salary",
+                                "fieldPath": "warehouse_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5219,7 +6752,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "hourly_rate",
+                                "fieldPath": "qty_in_stock",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5227,20 +6760,20 @@
                                         "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "FLOAT()",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vacation_days",
+                                "fieldPath": "inventory_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "DATE()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -5252,12 +6785,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5269,12 +6803,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5285,7 +6820,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5293,12 +6828,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -5317,12 +6853,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -5341,12 +6878,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -5356,13 +6894,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -5373,22 +6912,22 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "ROS_Count": "1",
-                            "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(warehouse_dimension.warehouse_key)",
-                            "projection_size": "2 KB",
+                            "Projection_Type": "is_aggregate_projection, has_expressions",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(readings.meter_id)",
+                            "Projection_size": "0 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "warehouse_dimension_super",
+                        "name": "readings_topk",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.warehouse_dimension_super",
+                        "schemaName": "public.readings_topk",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5407,7 +6946,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "warehouse_key",
+                                "fieldPath": "meter_id",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5420,67 +6959,28 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "warehouse_name",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=20)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_address",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=256)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_city",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=60)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_state",
+                                "fieldPath": "recent_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=2)",
+                                "nativeDataType": "TIMESTAMP_WITH_PRECISION()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "warehouse_region",
+                                "fieldPath": "recent_value",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "FLOAT()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -5492,12 +6992,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5509,12 +7010,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5525,7 +7027,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5533,12 +7035,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -5557,12 +7060,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -5581,12 +7085,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -5596,13 +7101,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -5613,22 +7119,22 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "ROS_Count": "1",
-                            "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(shipping_dimension.shipping_key)",
-                            "projection_size": "1 KB",
+                            "Projection_Type": "is_aggregate_projection, has_expressions",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(clicks.page_id, (clicks.click_time)::date)",
+                            "Projection_size": "0 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "shipping_dimension_super",
+                        "name": "clicks_agg",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.shipping_dimension_super",
+                        "schemaName": "public.clicks_agg",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5647,7 +7153,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "shipping_key",
+                                "fieldPath": "page_id",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5658,45 +7164,6 @@
                                 "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_type",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=30)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_mode",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=10)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_carrier",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=20)",
-                                "recursive": false,
-                                "isPartOfKey": false
                             }
                         ]
                     }
@@ -5706,12 +7173,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5723,12 +7191,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5739,7 +7208,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5747,12 +7216,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -5771,12 +7241,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -5795,12 +7266,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -5810,13 +7282,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -5828,21 +7301,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)",
-                            "projection_size": "2566 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(phrases.phrase)",
+                            "Projection_size": "0 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "inventory_fact_super",
+                        "name": "phrases_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.inventory_fact_super",
+                        "schemaName": "public.phrases_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5861,80 +7334,15 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "date_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "product_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "product_version",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "qty_in_stock",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "inventory_date",
+                                "fieldPath": "phrase",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "VARCHAR(length=128)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -5946,12 +7354,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5963,12 +7372,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5979,7 +7389,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5987,12 +7397,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -6011,7 +7422,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6035,7 +7447,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6050,7 +7463,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6065,7 +7479,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6082,7 +7497,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6097,7 +7513,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6117,7 +7534,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6141,7 +7559,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6156,7 +7575,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6172,7 +7592,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.393181+00:00"
+                            "create_time": "2023-10-13 11:22:37.879951+00:00",
+                            "table_size": "2 KB"
                         },
                         "name": "store_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -6441,7 +7862,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6458,7 +7880,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6482,7 +7905,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6506,7 +7930,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6521,7 +7946,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6537,7 +7963,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.404717+00:00"
+                            "create_time": "2023-10-13 11:22:37.890717+00:00",
+                            "table_size": "8646 KB"
                         },
                         "name": "store_orders_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -6819,7 +8246,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6836,7 +8264,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6860,7 +8289,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6884,7 +8314,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6899,7 +8330,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6915,7 +8347,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.396731+00:00"
+                            "create_time": "2023-10-13 11:22:37.883186+00:00",
+                            "table_size": "225060 KB"
                         },
                         "name": "store_sales_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -7171,7 +8604,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7188,7 +8622,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7212,7 +8647,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7236,7 +8672,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7251,7 +8688,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7269,11 +8707,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(store_dimension.store_key)",
-                            "projection_size": "2 KB",
+                            "Projection_size": "2 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "store_dimension_super",
@@ -7543,7 +8981,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7560,7 +8999,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7584,7 +9024,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7608,7 +9049,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7632,7 +9074,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7647,7 +9090,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7665,11 +9109,11 @@
                         "customProperties": {
                             "ROS_Count": "2",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(store_sales_fact.date_key, store_sales_fact.product_key, store_sales_fact.product_version, store_sales_fact.store_key, store_sales_fact.promotion_key, store_sales_fact.customer_key, store_sales_fact.employee_key, store_sales_fact.pos_transaction_number)",
-                            "projection_size": "225089 KB",
+                            "Projection_size": "225060 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "store_sales_fact_super",
@@ -7926,7 +9370,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7943,7 +9388,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7967,7 +9413,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7991,7 +9438,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8015,7 +9463,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8030,7 +9479,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8048,11 +9498,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(store_orders_fact.product_key, store_orders_fact.product_version, store_orders_fact.store_key, store_orders_fact.vendor_key, store_orders_fact.employee_key, store_orders_fact.order_number, store_orders_fact.date_ordered, store_orders_fact.date_shipped)",
-                            "projection_size": "8648 KB",
+                            "Projection_size": "8646 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "store_orders_fact_super",
@@ -8335,7 +9785,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8352,7 +9803,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8376,7 +9828,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8400,7 +9853,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8424,7 +9878,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8439,7 +9894,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8454,7 +9910,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8471,7 +9928,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8486,7 +9944,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8506,7 +9965,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8530,7 +9990,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8545,7 +10006,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8561,7 +10023,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.415595+00:00"
+                            "create_time": "2023-10-13 11:22:37.900841+00:00",
+                            "table_size": "6 KB"
                         },
                         "name": "call_center_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -8752,7 +10215,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8769,7 +10233,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8793,7 +10258,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8817,7 +10283,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8832,7 +10299,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8848,7 +10316,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.412266+00:00"
+                            "create_time": "2023-10-13 11:22:37.897788+00:00",
+                            "table_size": "9 KB"
                         },
                         "name": "online_page_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -8961,7 +10430,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8978,7 +10448,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9002,7 +10473,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9026,7 +10498,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9041,7 +10514,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9057,7 +10531,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.419260+00:00"
+                            "create_time": "2023-10-13 11:22:37.903963+00:00",
+                            "table_size": "182356 KB"
                         },
                         "name": "online_sales_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -9352,7 +10827,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9369,7 +10845,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9393,7 +10870,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9417,7 +10895,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9432,7 +10911,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9450,11 +10930,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(online_page_dimension.online_page_key)",
-                            "projection_size": "9 KB",
+                            "Projection_size": "9 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "online_page_dimension_super",
@@ -9568,7 +11048,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9585,7 +11066,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9609,7 +11091,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9633,7 +11116,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9657,7 +11141,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9672,7 +11157,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9690,11 +11176,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(call_center_dimension.call_center_key)",
-                            "projection_size": "6 KB",
+                            "Projection_size": "6 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "call_center_dimension_super",
@@ -9886,7 +11372,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9903,7 +11390,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9927,7 +11415,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9951,7 +11440,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9975,7 +11465,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9990,7 +11481,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10008,11 +11500,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(online_sales_fact.sale_date_key, online_sales_fact.ship_date_key, online_sales_fact.product_key, online_sales_fact.product_version, online_sales_fact.customer_key, online_sales_fact.call_center_key, online_sales_fact.online_page_key, online_sales_fact.shipping_key)",
-                            "projection_size": "182385 KB",
+                            "Projection_size": "182356 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "online_sales_fact_super",
@@ -10308,7 +11800,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10325,7 +11818,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10349,7 +11843,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10373,7 +11868,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml
index ebd800ee09ff5..a182e54bd53c7 100644
--- a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml
+++ b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml
@@ -5,6 +5,13 @@ source:
     database: Vmart
     username: dbadmin
     password: abc123
+    include_tables: true
+    include_views: true
+    include_projections: true
+    include_models: true
+    include_view_lineage: true
+    include_projection_lineage: true
+
 
 sink:
   type: file

From d2eb42373fa1d36011c020500668da8cf863e165 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 18 Oct 2023 11:34:45 -0400
Subject: [PATCH 64/98] fix(ingest/sqlalchemy): Fix URL parsing when
 sqlalchemy_uri provided (#9032)

---
 .../ingestion/source/sql/sql_config.py        | 47 +++++++++++--------
 .../source/sql/two_tier_sql_source.py         | 31 ++++++++----
 .../tests/unit/test_athena_source.py          | 12 ++---
 .../tests/unit/test_clickhouse_source.py      |  4 +-
 .../tests/unit/test_snowflake_source.py       | 30 +++++++-----
 5 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
index 08cc74aec3977..57aae32b361cf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
@@ -1,10 +1,10 @@
 import logging
 from abc import abstractmethod
 from typing import Any, Dict, Optional
-from urllib.parse import quote_plus
 
 import pydantic
 from pydantic import Field
+from sqlalchemy.engine import URL
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
 from datahub.configuration.source_common import (
@@ -125,7 +125,11 @@ class SQLAlchemyConnectionConfig(ConfigModel):
     # Duplicate of SQLCommonConfig.options
     options: dict = pydantic.Field(
         default_factory=dict,
-        description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.",
+        description=(
+            "Any options specified here will be passed to "
+            "[SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs."
+            " To set connection arguments in the URL, specify them under `connect_args`."
+        ),
     )
 
     _database_alias_deprecation = pydantic_field_deprecated(
@@ -161,21 +165,26 @@ def make_sqlalchemy_uri(
     db: Optional[str],
     uri_opts: Optional[Dict[str, Any]] = None,
 ) -> str:
-    url = f"{scheme}://"
-    if username is not None:
-        url += f"{quote_plus(username)}"
-        if password is not None:
-            url += f":{quote_plus(password)}"
-        url += "@"
-    if at is not None:
-        url += f"{at}"
-    if db is not None:
-        url += f"/{db}"
-    if uri_opts is not None:
-        if db is None:
-            url += "/"
-        params = "&".join(
-            f"{key}={quote_plus(value)}" for (key, value) in uri_opts.items() if value
+    host: Optional[str] = None
+    port: Optional[int] = None
+    if at:
+        try:
+            host, port_str = at.rsplit(":", 1)
+            port = int(port_str)
+        except ValueError:
+            host = at
+            port = None
+    if uri_opts:
+        uri_opts = {k: v for k, v in uri_opts.items() if v is not None}
+
+    return str(
+        URL.create(
+            drivername=scheme,
+            username=username,
+            password=password,
+            host=host,
+            port=port,
+            database=db,
+            query=uri_opts or {},
         )
-        url = f"{url}?{params}"
-    return url
+    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
index d9062cef06eae..7a49551dc1235 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
@@ -1,8 +1,10 @@
 import typing
+import urllib.parse
 from typing import Any, Dict, Iterable, Optional
 
 from pydantic.fields import Field
 from sqlalchemy import create_engine, inspect
+from sqlalchemy.engine import URL
 from sqlalchemy.engine.reflection import Inspector
 
 from datahub.configuration.common import AllowDenyPattern
@@ -41,14 +43,27 @@ def get_sql_alchemy_url(
         uri_opts: typing.Optional[typing.Dict[str, typing.Any]] = None,
         current_db: typing.Optional[str] = None,
     ) -> str:
-        return self.sqlalchemy_uri or make_sqlalchemy_uri(
-            self.scheme,
-            self.username,
-            self.password.get_secret_value() if self.password else None,
-            self.host_port,
-            current_db if current_db else self.database,
-            uri_opts=uri_opts,
-        )
+        if self.sqlalchemy_uri:
+            parsed_url = urllib.parse.urlsplit(self.sqlalchemy_uri)
+            url = URL.create(
+                drivername=parsed_url.scheme,
+                username=parsed_url.username,
+                password=parsed_url.password,
+                host=parsed_url.hostname,
+                port=parsed_url.port,
+                database=current_db or parsed_url.path.lstrip("/"),
+                query=urllib.parse.parse_qs(parsed_url.query),
+            ).update_query_dict(uri_opts or {})
+            return str(url)
+        else:
+            return make_sqlalchemy_uri(
+                self.scheme,
+                self.username,
+                self.password.get_secret_value() if self.password else None,
+                self.host_port,
+                current_db or self.database,
+                uri_opts=uri_opts,
+            )
 
 
 class TwoTierSQLAlchemySource(SQLAlchemySource):
diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py
index 2558f6a46715e..7a947e8f86bfe 100644
--- a/metadata-ingestion/tests/unit/test_athena_source.py
+++ b/metadata-ingestion/tests/unit/test_athena_source.py
@@ -10,7 +10,6 @@
 FROZEN_TIME = "2020-04-14 07:00:00"
 
 
-@pytest.mark.integration
 def test_athena_config_query_location_old_plus_new_value_not_allowed():
     from datahub.ingestion.source.sql.athena import AthenaConfig
 
@@ -25,7 +24,6 @@ def test_athena_config_query_location_old_plus_new_value_not_allowed():
         )
 
 
-@pytest.mark.integration
 def test_athena_config_staging_dir_is_set_as_query_result():
     from datahub.ingestion.source.sql.athena import AthenaConfig
 
@@ -48,7 +46,6 @@ def test_athena_config_staging_dir_is_set_as_query_result():
     assert config.json() == expected_config.json()
 
 
-@pytest.mark.integration
 def test_athena_uri():
     from datahub.ingestion.source.sql.athena import AthenaConfig
 
@@ -59,9 +56,12 @@ def test_athena_uri():
             "work_group": "test-workgroup",
         }
     )
-    assert (
-        config.get_sql_alchemy_url()
-        == "awsathena+rest://@athena.us-west-1.amazonaws.com:443/?s3_staging_dir=s3%3A%2F%2Fquery-result-location%2F&work_group=test-workgroup&catalog_name=awsdatacatalog&duration_seconds=3600"
+    assert config.get_sql_alchemy_url() == (
+        "awsathena+rest://@athena.us-west-1.amazonaws.com:443"
+        "?catalog_name=awsdatacatalog"
+        "&duration_seconds=3600"
+        "&s3_staging_dir=s3%3A%2F%2Fquery-result-location%2F"
+        "&work_group=test-workgroup"
     )
 
 
diff --git a/metadata-ingestion/tests/unit/test_clickhouse_source.py b/metadata-ingestion/tests/unit/test_clickhouse_source.py
index de7e7d66f2129..1b2ffb70c8d19 100644
--- a/metadata-ingestion/tests/unit/test_clickhouse_source.py
+++ b/metadata-ingestion/tests/unit/test_clickhouse_source.py
@@ -26,9 +26,7 @@ def test_clickhouse_uri_native():
             "scheme": "clickhouse+native",
         }
     )
-    assert (
-        config.get_sql_alchemy_url() == "clickhouse+native://user:password@host:1111/"
-    )
+    assert config.get_sql_alchemy_url() == "clickhouse+native://user:password@host:1111"
 
 
 def test_clickhouse_uri_native_secure():
diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py
index 1c26ca2487e5c..888a7c0441554 100644
--- a/metadata-ingestion/tests/unit/test_snowflake_source.py
+++ b/metadata-ingestion/tests/unit/test_snowflake_source.py
@@ -179,10 +179,12 @@ def test_snowflake_uri_default_authentication():
         }
     )
 
-    assert (
-        config.get_sql_alchemy_url()
-        == "snowflake://user:password@acctname/?authenticator=SNOWFLAKE&warehouse=COMPUTE_WH&role"
-        "=sysadmin&application=acryl_datahub"
+    assert config.get_sql_alchemy_url() == (
+        "snowflake://user:password@acctname"
+        "?application=acryl_datahub"
+        "&authenticator=SNOWFLAKE"
+        "&role=sysadmin"
+        "&warehouse=COMPUTE_WH"
     )
 
 
@@ -198,10 +200,12 @@ def test_snowflake_uri_external_browser_authentication():
         }
     )
 
-    assert (
-        config.get_sql_alchemy_url()
-        == "snowflake://user@acctname/?authenticator=EXTERNALBROWSER&warehouse=COMPUTE_WH&role"
-        "=sysadmin&application=acryl_datahub"
+    assert config.get_sql_alchemy_url() == (
+        "snowflake://user@acctname"
+        "?application=acryl_datahub"
+        "&authenticator=EXTERNALBROWSER"
+        "&role=sysadmin"
+        "&warehouse=COMPUTE_WH"
     )
 
 
@@ -219,10 +223,12 @@ def test_snowflake_uri_key_pair_authentication():
         }
     )
 
-    assert (
-        config.get_sql_alchemy_url()
-        == "snowflake://user@acctname/?authenticator=SNOWFLAKE_JWT&warehouse=COMPUTE_WH&role"
-        "=sysadmin&application=acryl_datahub"
+    assert config.get_sql_alchemy_url() == (
+        "snowflake://user@acctname"
+        "?application=acryl_datahub"
+        "&authenticator=SNOWFLAKE_JWT"
+        "&role=sysadmin"
+        "&warehouse=COMPUTE_WH"
     )
 
 

From 1eaf9c8c5ff2676a5c4ac456d7b4a6d351697e73 Mon Sep 17 00:00:00 2001
From: Tim <50115603+bossenti@users.noreply.github.com>
Date: Wed, 18 Oct 2023 18:39:59 +0200
Subject: [PATCH 65/98] feature(ingest/athena): introduce support for complex
 and nested schemas in Athena (#8137)

Co-authored-by: dnks23 <dominik.s23@live.de>
Co-authored-by: Tamas Nemeth <treff7es@gmail.com>
Co-authored-by: Tim <tim@MBP-von-Tim.fritz.box>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 metadata-ingestion/setup.py                   |   5 +-
 .../datahub/ingestion/source/sql/athena.py    | 200 +++++++++++++++++-
 .../ingestion/source/sql/sql_common.py        |   4 +
 .../datahub/ingestion/source/sql/sql_types.py |  12 +-
 .../utilities/sqlalchemy_type_converter.py    | 200 ++++++++++++++++++
 .../tests/unit/test_athena_source.py          |  86 +++++++-
 .../test_sqlalchemy_type_converter.py         |  93 ++++++++
 7 files changed, 589 insertions(+), 11 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
 create mode 100644 metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 0b0a2b13fb52a..c46409ecbf52f 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -280,8 +280,9 @@
     # Misc plugins.
     "sql-parser": sqlglot_lib,
     # Source plugins
-    # PyAthena is pinned with exact version because we use private method in PyAthena
-    "athena": sql_common | {"PyAthena[SQLAlchemy]==2.4.1"},
+    # sqlalchemy-bigquery is included here since it provides an implementation of
+    # a SQLalchemy-conform STRUCT type definition
+    "athena": sql_common | {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"},
     "azure-ad": set(),
     "bigquery": sql_common
     | bigquery_common
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index 9cb613bde1e9f..dad61e5173166 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -1,12 +1,17 @@
 import json
 import logging
+import re
 import typing
-from typing import Any, Dict, Iterable, List, Optional, Tuple, cast
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
 
 import pydantic
 from pyathena.common import BaseCursor
 from pyathena.model import AthenaTableMetadata
+from pyathena.sqlalchemy_athena import AthenaRestDialect
+from sqlalchemy import create_engine, inspect, types
 from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.types import TypeEngine
+from sqlalchemy_bigquery import STRUCT
 
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.emitter.mcp_builder import ContainerKey, DatabaseKey
@@ -21,13 +26,164 @@
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.aws.s3_util import make_s3_urn
 from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
-from datahub.ingestion.source.sql.sql_common import SQLAlchemySource
+from datahub.ingestion.source.sql.sql_common import (
+    SQLAlchemySource,
+    register_custom_type,
+)
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri
+from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     gen_database_container,
     gen_database_key,
 )
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
+from datahub.metadata.schema_classes import RecordTypeClass
+from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column
+from datahub.utilities.sqlalchemy_type_converter import (
+    get_schema_fields_for_sqlalchemy_column,
+)
+
+logger = logging.getLogger(__name__)
+
+register_custom_type(STRUCT, RecordTypeClass)
+
+
+class CustomAthenaRestDialect(AthenaRestDialect):
+    """Custom definition of the Athena dialect.
+
+    Custom implementation that allows to extend/modify the behavior of the SQLalchemy
+    dialect that is used by PyAthena (which is the library that is used by DataHub
+    to extract metadata from Athena).
+    This dialect can then be used by the inspector (see get_inspectors()).
+
+    """
+
+    # regex to identify complex types in DDL strings which are embedded in `<>`.
+    _complex_type_pattern = re.compile(r"(<.+>)")
+
+    @typing.no_type_check
+    def _get_column_type(
+        self, type_: Union[str, Dict[str, Any]]
+    ) -> TypeEngine:  # noqa: C901
+        """Derives the data type of the Athena column.
+
+        This method is overwritten to extend the behavior of PyAthena.
+        Pyathena is not capable of detecting complex data types, e.g.,
+        arrays, maps, or, structs (as of version 2.25.2).
+        The custom implementation extends the functionality by the above-mentioned data types.
+        """
+
+        # Originally, this method only handles `type_` as a string
+        # With the workaround used below to parse DDL strings for structs,
+        # `type` might also be a dictionary
+        if isinstance(type_, str):
+            match = self._pattern_column_type.match(type_)
+            if match:
+                type_name = match.group(1).lower()
+                type_meta_information = match.group(2)
+            else:
+                type_name = type_.lower()
+                type_meta_information = None
+        elif isinstance(type_, dict):
+            # this occurs only when a type parsed as part of a STRUCT is passed
+            # in such case type_ is a dictionary whose type can be retrieved from the attribute
+            type_name = type_.get("type", None)
+            type_meta_information = None
+        else:
+            raise RuntimeError(f"Unsupported type definition: {type_}")
+
+        args = []
+
+        if type_name in ["array"]:
+            detected_col_type = types.ARRAY
+
+            # here we need to account again for two options how `type_` is passed to this method
+            # first, the simple array definition as a DDL string (something like array<string>)
+            # this is always the case when the array is not part of a complex data type (mainly STRUCT)
+            # second, the array definition can also be passed in form of dictionary
+            # this is the case when the array is part of a complex data type
+            if isinstance(type_, str):
+                # retrieve the raw name of the data type as a string
+                array_type_raw = self._complex_type_pattern.findall(type_)[0][
+                    1:-1
+                ]  # array type without enclosing <>
+                # convert the string name of the data type into a SQLalchemy type (expected return)
+                array_type = self._get_column_type(array_type_raw)
+            elif isinstance(type_, dict):
+                # retrieve the data type of the array items and
+                # transform it into a SQLalchemy type
+                array_type = self._get_column_type(type_["items"])
+            else:
+                raise RuntimeError(f"Unsupported array definition: {type_}")
+
+            args = [array_type]
+
+        elif type_name in ["struct", "record"]:
+            # STRUCT is not part of the SQLalchemy types selection
+            # but is provided by another official SQLalchemy library and
+            # compatible with the other SQLalchemy types
+            detected_col_type = STRUCT
+
+            if isinstance(type_, dict):
+                # in case a struct as part of another struct is passed
+                # it is provided in form of a dictionary and
+                # can simply be used for the further processing
+                struct_type = type_
+            else:
+                # this is the case when the type definition of the struct is passed as a DDL string
+                # therefore, it is required to parse the DDL string
+                # here a method provided in another Datahub source is used so that the parsing
+                # doesn't need to be implemented twice
+                # `get_avro_schema_for_hive_column` accepts a DDL description as column type and
+                # returns the parsed data types in form of a dictionary
+                schema = get_avro_schema_for_hive_column(
+                    hive_column_name=type_name, hive_column_type=type_
+                )
+
+                # the actual type description needs to be extracted
+                struct_type = schema["fields"][0]["type"]
+
+            # A STRUCT consist of multiple attributes which are expected to be passed as
+            # a list of tuples consisting of name data type pairs. e.g., `('age', Integer())`
+            # See the reference:
+            # https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_struct.py#L53
+            #
+            # To extract all of them, we simply iterate over all detected fields and
+            # convert them to SQLalchemy types
+            struct_args = []
+            for field in struct_type["fields"]:
+                struct_args.append(
+                    (
+                        field["name"],
+                        self._get_column_type(field["type"]["type"])
+                        if field["type"]["type"] not in ["record", "array"]
+                        else self._get_column_type(field["type"]),
+                    )
+                )
+
+            args = struct_args
+
+        elif type_name in ["map"]:
+            # Instead of SQLalchemy's TupleType the custom MapType is used here
+            # which is just a simple wrapper around TupleType
+            detected_col_type = MapType
+
+            # the type definition for maps looks like the following: key_type:val_type (e.g., string:string)
+            key_type_raw, value_type_raw = type_meta_information.split(",")
+
+            # convert both type names to actual SQLalchemy types
+            args = [
+                self._get_column_type(key_type_raw),
+                self._get_column_type(value_type_raw),
+            ]
+        # by using get_avro_schema_for_hive_column() for parsing STRUCTs the data type `long`
+        # can also be returned, so we need to extend the handling here as well
+        elif type_name in ["bigint", "long"]:
+            detected_col_type = types.BIGINT
+        else:
+            return super()._get_column_type(type_name)
+        return detected_col_type(*args)
 
 
 class AthenaConfig(SQLCommonConfig):
@@ -129,6 +285,18 @@ def create(cls, config_dict, ctx):
         config = AthenaConfig.parse_obj(config_dict)
         return cls(config, ctx)
 
+    # overwrite this method to allow to specify the usage of a custom dialect
+    def get_inspectors(self) -> Iterable[Inspector]:
+        url = self.config.get_sql_alchemy_url()
+        logger.debug(f"sql_alchemy_url={url}")
+        engine = create_engine(url, **self.config.options)
+
+        # set custom dialect to be used by the inspector
+        engine.dialect = CustomAthenaRestDialect()
+        with engine.connect() as conn:
+            inspector = inspect(conn)
+            yield inspector
+
     def get_table_properties(
         self, inspector: Inspector, schema: str, table: str
     ) -> Tuple[Optional[str], Dict[str, str], Optional[str]]:
@@ -136,9 +304,7 @@ def get_table_properties(
             self.cursor = cast(BaseCursor, inspector.engine.raw_connection().cursor())
             assert self.cursor
 
-        # Unfortunately properties can be only get through private methods as those are not exposed
-        # https://github.com/laughingman7743/PyAthena/blob/9e42752b0cc7145a87c3a743bb2634fe125adfa7/pyathena/model.py#L201
-        metadata: AthenaTableMetadata = self.cursor._get_table_metadata(
+        metadata: AthenaTableMetadata = self.cursor.get_table_metadata(
             table_name=table, schema_name=schema
         )
         description = metadata.comment
@@ -241,6 +407,30 @@ def get_schema_names(self, inspector: Inspector) -> List[str]:
             return [schema for schema in schemas if schema == athena_config.database]
         return schemas
 
+    # Overwrite to modify the creation of schema fields
+    def get_schema_fields_for_column(
+        self,
+        dataset_name: str,
+        column: Dict,
+        pk_constraints: Optional[dict] = None,
+        tags: Optional[List[str]] = None,
+    ) -> List[SchemaField]:
+        fields = get_schema_fields_for_sqlalchemy_column(
+            column_name=column["name"],
+            column_type=column["type"],
+            description=column.get("comment", None),
+            nullable=column.get("nullable", True),
+            is_part_of_key=True
+            if (
+                pk_constraints is not None
+                and isinstance(pk_constraints, dict)
+                and column["name"] in pk_constraints.get("constrained_columns", [])
+            )
+            else False,
+        )
+
+        return fields
+
     def close(self):
         if self.cursor:
             self.cursor.close()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 056be6c2e50ac..6524eea8222d4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -37,6 +37,7 @@
     DatasetSubTypes,
 )
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
+from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     downgrade_schema_from_v2,
@@ -80,6 +81,7 @@
     DatasetLineageTypeClass,
     DatasetPropertiesClass,
     GlobalTagsClass,
+    MapTypeClass,
     SubTypesClass,
     TagAssociationClass,
     UpstreamClass,
@@ -154,6 +156,8 @@ class SqlWorkUnit(MetadataWorkUnit):
     types.DATETIME: TimeTypeClass,
     types.TIMESTAMP: TimeTypeClass,
     types.JSON: RecordTypeClass,
+    # additional type definitions that are used by the Athena source
+    MapType: MapTypeClass,  # type: ignore
     # Because the postgresql dialect is used internally by many other dialects,
     # we add some postgres types here. This is ok to do because the postgresql
     # dialect is built-in to sqlalchemy.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
index 3b4a7e1dc0287..51626891e9fef 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
@@ -1,13 +1,15 @@
 import re
 from typing import Any, Dict, ValuesView
 
+from sqlalchemy import types
+
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     ArrayType,
     BooleanType,
     BytesType,
     DateType,
     EnumType,
-    MapType,
+    MapType as MapTypeAvro,
     NullType,
     NumberType,
     RecordType,
@@ -363,10 +365,16 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
     "time": TimeType,
     "timestamp": TimeType,
     "row": RecordType,
-    "map": MapType,
+    "map": MapTypeAvro,
     "array": ArrayType,
 }
 
+
+class MapType(types.TupleType):
+    # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub
+    pass
+
+
 # https://docs.aws.amazon.com/athena/latest/ug/data-types.html
 # https://github.com/dbt-athena/dbt-athena/tree/main
 ATHENA_SQL_TYPES_MAP: Dict[str, Any] = {
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
new file mode 100644
index 0000000000000..a431f262a85fd
--- /dev/null
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
@@ -0,0 +1,200 @@
+import json
+import logging
+import uuid
+from typing import Any, Dict, List, Optional, Type, Union
+
+from sqlalchemy import types
+from sqlalchemy_bigquery import STRUCT
+
+from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
+from datahub.ingestion.source.sql.sql_types import MapType
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
+from datahub.metadata.schema_classes import NullTypeClass, SchemaFieldDataTypeClass
+
+logger = logging.getLogger(__name__)
+
+
+class SqlAlchemyColumnToAvroConverter:
+    """Helper class that collects some methods to convert SQLalchemy columns to Avro schema."""
+
+    # tuple of complex data types that require a special handling
+    _COMPLEX_TYPES = (STRUCT, types.ARRAY, MapType)
+
+    # mapping of primitive SQLalchemy data types to AVRO schema data types
+    PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE: Dict[Type[types.TypeEngine], str] = {
+        types.String: "string",
+        types.BINARY: "string",
+        types.BOOLEAN: "boolean",
+        types.FLOAT: "float",
+        types.INTEGER: "int",
+        types.BIGINT: "long",
+        types.VARCHAR: "string",
+        types.CHAR: "string",
+    }
+
+    @classmethod
+    def get_avro_type(
+        cls, column_type: Union[types.TypeEngine, STRUCT, MapType], nullable: bool
+    ) -> Dict[str, Any]:
+        """Determines the concrete AVRO schema type for a SQLalchemy-typed column"""
+
+        if type(column_type) in cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys():
+            return {
+                "type": cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE[type(column_type)],
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.DECIMAL):
+            return {
+                "type": "bytes",
+                "logicalType": "decimal",
+                "precision": int(column_type.precision),
+                "scale": int(column_type.scale),
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.DATE):
+            return {
+                "type": "int",
+                "logicalType": "date",
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.TIMESTAMP):
+            return {
+                "type": "long",
+                "logicalType": "timestamp-millis",
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.ARRAY):
+            array_type = column_type.item_type
+            return {
+                "type": "array",
+                "items": cls.get_avro_type(column_type=array_type, nullable=nullable),
+                "native_data_type": f"array<{str(column_type.item_type)}>",
+            }
+        if isinstance(column_type, MapType):
+            key_type = column_type.types[0]
+            value_type = column_type.types[1]
+            return {
+                "type": "map",
+                "values": cls.get_avro_type(column_type=value_type, nullable=nullable),
+                "native_data_type": str(column_type),
+                "key_type": cls.get_avro_type(column_type=key_type, nullable=nullable),
+                "key_native_data_type": str(key_type),
+            }
+        if isinstance(column_type, STRUCT):
+            fields = []
+            for field_def in column_type._STRUCT_fields:
+                field_name, field_type = field_def
+                fields.append(
+                    {
+                        "name": field_name,
+                        "type": cls.get_avro_type(
+                            column_type=field_type, nullable=nullable
+                        ),
+                    }
+                )
+            struct_name = f"__struct_{str(uuid.uuid4()).replace('-', '')}"
+
+            return {
+                "type": "record",
+                "name": struct_name,
+                "fields": fields,
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+
+        return {
+            "type": "null",
+            "native_data_type": str(column_type),
+            "_nullable": nullable,
+        }
+
+    @classmethod
+    def get_avro_for_sqlalchemy_column(
+        cls,
+        column_name: str,
+        column_type: types.TypeEngine,
+        nullable: bool,
+    ) -> Union[object, Dict[str, object]]:
+        """Returns the AVRO schema representation of a SQLalchemy column."""
+        if isinstance(column_type, cls._COMPLEX_TYPES):
+            return {
+                "type": "record",
+                "name": "__struct_",
+                "fields": [
+                    {
+                        "name": column_name,
+                        "type": cls.get_avro_type(
+                            column_type=column_type, nullable=nullable
+                        ),
+                    }
+                ],
+            }
+        return cls.get_avro_type(column_type=column_type, nullable=nullable)
+
+
+def get_schema_fields_for_sqlalchemy_column(
+    column_name: str,
+    column_type: types.TypeEngine,
+    description: Optional[str] = None,
+    nullable: Optional[bool] = True,
+    is_part_of_key: Optional[bool] = False,
+) -> List[SchemaField]:
+    """Creates SchemaFields from a given SQLalchemy column.
+
+    This function is analogous to `get_schema_fields_for_hive_column` from datahub.utilities.hive_schema_to_avro.
+    The main purpose of implementing it this way, is to make it ready/compatible for second field path generation,
+    which allows to explore nested structures within the UI.
+    """
+
+    if nullable is None:
+        nullable = True
+
+    try:
+        # as a first step, the column is converted to AVRO JSON which can then be used by an existing function
+        avro_schema_json = (
+            SqlAlchemyColumnToAvroConverter.get_avro_for_sqlalchemy_column(
+                column_name=column_name,
+                column_type=column_type,
+                nullable=nullable,
+            )
+        )
+        # retrieve schema field definitions from the above generated AVRO JSON structure
+        schema_fields = avro_schema_to_mce_fields(
+            avro_schema=json.dumps(avro_schema_json),
+            default_nullable=nullable,
+            swallow_exceptions=False,
+        )
+    except Exception as e:
+        logger.warning(
+            f"Unable to parse column {column_name} and type {column_type} the error was: {e}"
+        )
+
+        # fallback description in case any exception occurred
+        schema_fields = [
+            SchemaField(
+                fieldPath=column_name,
+                type=SchemaFieldDataTypeClass(type=NullTypeClass()),
+                nativeDataType=str(column_type),
+            )
+        ]
+
+    # for all non-nested data types an additional modification of the `fieldPath` property is required
+    if type(column_type) in (
+        *SqlAlchemyColumnToAvroConverter.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys(),
+        types.TIMESTAMP,
+        types.DATE,
+        types.DECIMAL,
+    ):
+        schema_fields[0].fieldPath += f".{column_name}"
+
+    if description:
+        schema_fields[0].description = description
+    schema_fields[0].isPartOfKey = (
+        is_part_of_key if is_part_of_key is not None else False
+    )
+
+    return schema_fields
diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py
index 7a947e8f86bfe..6d3ed20eafde2 100644
--- a/metadata-ingestion/tests/unit/test_athena_source.py
+++ b/metadata-ingestion/tests/unit/test_athena_source.py
@@ -3,9 +3,13 @@
 
 import pytest
 from freezegun import freeze_time
+from sqlalchemy import types
+from sqlalchemy_bigquery import STRUCT
 
 from datahub.ingestion.api.common import PipelineContext
-from src.datahub.ingestion.source.aws.s3_util import make_s3_urn
+from datahub.ingestion.source.aws.s3_util import make_s3_urn
+from datahub.ingestion.source.sql.athena import CustomAthenaRestDialect
+from datahub.ingestion.source.sql.sql_types import MapType
 
 FROZEN_TIME = "2020-04-14 07:00:00"
 
@@ -104,7 +108,7 @@ def test_athena_get_table_properties():
     mock_cursor = mock.MagicMock()
     mock_inspector = mock.MagicMock()
     mock_inspector.engine.raw_connection().cursor.return_value = mock_cursor
-    mock_cursor._get_table_metadata.return_value = AthenaTableMetadata(
+    mock_cursor.get_table_metadata.return_value = AthenaTableMetadata(
         response=table_metadata
     )
 
@@ -126,3 +130,81 @@ def test_athena_get_table_properties():
     }
 
     assert location == make_s3_urn("s3://testLocation", "PROD")
+
+
+def test_get_column_type_simple_types():
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="int"), types.Integer
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="string"), types.String
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="boolean"), types.BOOLEAN
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="long"), types.BIGINT
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="double"), types.FLOAT
+    )
+
+
+def test_get_column_type_array():
+    result = CustomAthenaRestDialect()._get_column_type(type_="array<string>")
+
+    assert isinstance(result, types.ARRAY)
+    assert isinstance(result.item_type, types.String)
+
+
+def test_get_column_type_map():
+    result = CustomAthenaRestDialect()._get_column_type(type_="map<string,int>")
+
+    assert isinstance(result, MapType)
+    assert isinstance(result.types[0], types.String)
+    assert isinstance(result.types[1], types.Integer)
+
+
+def test_column_type_struct():
+
+    result = CustomAthenaRestDialect()._get_column_type(type_="struct<test:string>")
+
+    assert isinstance(result, STRUCT)
+    assert isinstance(result._STRUCT_fields[0], tuple)
+    assert result._STRUCT_fields[0][0] == "test"
+    assert isinstance(result._STRUCT_fields[0][1], types.String)
+
+
+def test_column_type_complex_combination():
+
+    result = CustomAthenaRestDialect()._get_column_type(
+        type_="struct<id:string,name:string,choices:array<struct<id:string,label:string>>>"
+    )
+
+    assert isinstance(result, STRUCT)
+
+    assert isinstance(result._STRUCT_fields[0], tuple)
+    assert result._STRUCT_fields[0][0] == "id"
+    assert isinstance(result._STRUCT_fields[0][1], types.String)
+
+    assert isinstance(result._STRUCT_fields[1], tuple)
+    assert result._STRUCT_fields[1][0] == "name"
+    assert isinstance(result._STRUCT_fields[1][1], types.String)
+
+    assert isinstance(result._STRUCT_fields[2], tuple)
+    assert result._STRUCT_fields[2][0] == "choices"
+    assert isinstance(result._STRUCT_fields[2][1], types.ARRAY)
+
+    assert isinstance(result._STRUCT_fields[2][1].item_type, STRUCT)
+
+    assert isinstance(result._STRUCT_fields[2][1].item_type._STRUCT_fields[0], tuple)
+    assert result._STRUCT_fields[2][1].item_type._STRUCT_fields[0][0] == "id"
+    assert isinstance(
+        result._STRUCT_fields[2][1].item_type._STRUCT_fields[0][1], types.String
+    )
+
+    assert isinstance(result._STRUCT_fields[2][1].item_type._STRUCT_fields[1], tuple)
+    assert result._STRUCT_fields[2][1].item_type._STRUCT_fields[1][0] == "label"
+    assert isinstance(
+        result._STRUCT_fields[2][1].item_type._STRUCT_fields[1][1], types.String
+    )
diff --git a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py
new file mode 100644
index 0000000000000..959da0987a825
--- /dev/null
+++ b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py
@@ -0,0 +1,93 @@
+from typing import no_type_check
+
+from sqlalchemy import types
+from sqlalchemy_bigquery import STRUCT
+
+from datahub.ingestion.source.sql.sql_types import MapType
+from datahub.metadata.schema_classes import (
+    ArrayTypeClass,
+    MapTypeClass,
+    NullTypeClass,
+    NumberTypeClass,
+    RecordTypeClass,
+)
+from datahub.utilities.sqlalchemy_type_converter import (
+    get_schema_fields_for_sqlalchemy_column,
+)
+
+
+def test_get_avro_schema_for_sqlalchemy_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=types.INTEGER()
+    )
+    assert len(schema_fields) == 1
+    assert schema_fields[0].fieldPath == "[version=2.0].[type=int].test"
+    assert schema_fields[0].type.type == NumberTypeClass()
+    assert schema_fields[0].nativeDataType == "INTEGER"
+    assert schema_fields[0].nullable is True
+
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=types.String(), nullable=False
+    )
+    assert len(schema_fields) == 1
+    assert schema_fields[0].fieldPath == "[version=2.0].[type=string].test"
+    assert schema_fields[0].type.type == NumberTypeClass()
+    assert schema_fields[0].nativeDataType == "VARCHAR"
+    assert schema_fields[0].nullable is False
+
+
+def test_get_avro_schema_for_sqlalchemy_array_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=types.ARRAY(types.FLOAT())
+    )
+    assert len(schema_fields) == 1
+    assert (
+        schema_fields[0].fieldPath
+        == "[version=2.0].[type=struct].[type=array].[type=float].test"
+    )
+    assert schema_fields[0].type.type == ArrayTypeClass(nestedType=["float"])
+    assert schema_fields[0].nativeDataType == "array<FLOAT>"
+
+
+def test_get_avro_schema_for_sqlalchemy_map_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=MapType(types.String(), types.BOOLEAN())
+    )
+    assert len(schema_fields) == 1
+    assert (
+        schema_fields[0].fieldPath
+        == "[version=2.0].[type=struct].[type=map].[type=boolean].test"
+    )
+    assert schema_fields[0].type.type == MapTypeClass(
+        keyType="string", valueType="boolean"
+    )
+    assert schema_fields[0].nativeDataType == "MapType(String(), BOOLEAN())"
+
+
+def test_get_avro_schema_for_sqlalchemy_struct_column() -> None:
+
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=STRUCT(("test", types.INTEGER()))
+    )
+    assert len(schema_fields) == 2
+    assert (
+        schema_fields[0].fieldPath == "[version=2.0].[type=struct].[type=struct].test"
+    )
+    assert schema_fields[0].type.type == RecordTypeClass()
+    assert schema_fields[0].nativeDataType == "STRUCT<test INT64>"
+
+    assert (
+        schema_fields[1].fieldPath
+        == "[version=2.0].[type=struct].[type=struct].test.[type=int].test"
+    )
+    assert schema_fields[1].type.type == NumberTypeClass()
+    assert schema_fields[1].nativeDataType == "INTEGER"
+
+
+@no_type_check
+def test_get_avro_schema_for_sqlalchemy_unknown_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column("invalid", "test")
+    assert len(schema_fields) == 1
+    assert schema_fields[0].type.type == NullTypeClass()
+    assert schema_fields[0].fieldPath == "[version=2.0].[type=null]"
+    assert schema_fields[0].nativeDataType == "test"

From bd5c4e0d70681b4640d25e62326e05be1c9deb65 Mon Sep 17 00:00:00 2001
From: Saketh-Mahesh <81051119+Saketh-Mahesh@users.noreply.github.com>
Date: Wed, 18 Oct 2023 11:48:39 -0500
Subject: [PATCH 66/98] docs: adding documentation for deployment of DataHub on
 Azure (#8612)

Co-authored-by: Saketh Mahesh <sakethmahesh@Sakeths-MacBook-Air.local>
---
 docs-website/sidebars.js |   1 +
 docs/deploy/azure.md     | 234 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 235 insertions(+)
 create mode 100644 docs/deploy/azure.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 4fa73c995157a..b2b3df4dfb33c 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -158,6 +158,7 @@ module.exports = {
         // The purpose of this section is to provide the minimum steps required to deploy DataHub to the vendor of your choosing
         "docs/deploy/aws",
         "docs/deploy/gcp",
+        "docs/deploy/azure",
         "docker/README",
         "docs/deploy/kubernetes",
         "docs/deploy/environment-vars",
diff --git a/docs/deploy/azure.md b/docs/deploy/azure.md
new file mode 100644
index 0000000000000..b940b82827e94
--- /dev/null
+++ b/docs/deploy/azure.md
@@ -0,0 +1,234 @@
+---
+title: "Deploying to Azure"
+---
+
+# Azure setup guide
+
+The following is a set of instructions to quickstart DataHub on Azure Kubernetes Service (AKS). Note, the guide
+assumes that you do not have a Kubernetes cluster set up. 
+
+## Prerequisites
+
+This guide requires the following tools:
+
+- [kubectl](https://kubernetes.io/docs/tasks/tools/) to manage Kubernetes resources
+- [helm](https://helm.sh/docs/intro/install/) to deploy the resources based on helm charts. Note, we only support Helm
+    3.
+- [AZ CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) to manage Azure resources
+
+To use the above tools, you need to set up Azure credentials by following
+this [guide](https://learn.microsoft.com/en-us/cli/azure/authenticate-azure-cli).
+
+## Start up a Kubernetes cluster on AKS
+
+You can follow this [guide](https://learn.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-cli) to create a new
+cluster using az cli. 
+
+Note: you can skip the application deployment step since we are deploying DataHub instead. If you are deploying DataHub to an existing cluster, please
+skip the corresponding sections.
+
+- Verify you have the Microsoft.OperationsManagement and Microsoft.OperationalInsights providers registered on your subscription. These Azure resource providers are required to support Container insights. Check the registration status using the following commands:
+
+```
+az provider show -n Microsoft.OperationsManagement -o table
+az provider show -n Microsoft.OperationalInsights -o table
+```
+
+If they're not registered, register them using the following commands:
+
+```
+az provider register --namespace Microsoft.OperationsManagement
+az provider register --namespace Microsoft.OperationalInsights
+```
+
+- Create a resource group. Change name, location to your choosing.
+
+```
+az group create --name myResourceGroup --location eastus
+```
+
+The following output indicates that the command execution was successful:
+
+```
+{
+  "id": "/subscriptions/<guid>/resourceGroups/myResourceGroup",
+  "location": "eastus",
+  "managedBy": null,
+  "name": "myResourceGroup",
+  "properties": {
+    "provisioningState": "Succeeded"
+  },
+  "tags": null
+}
+```
+- Create an AKS Cluster. For this project, it is best to increase node count to at least 3. Change cluster name, node count, and addons to your choosing.
+
+```
+az aks create -g myResourceGroup -n myAKSCluster --enable-managed-identity --node-count 3 --enable-addons monitoring --generate-ssh-keys
+```
+
+After a few minutes, the command completes and returns JSON-formatted information about the cluster.
+
+- Connect to the cluster
+
+Configure kubectl to connect to your Kubernetes cluster using the az aks get-credentials command.
+
+```
+az aks get-credentials --resource-group myResourceGroup --name myAKSCluster
+```
+
+Verify the connection to your cluster using the `kubectl get` command. This command returns a list of the cluster nodes.
+
+```
+kubectl get nodes
+```
+
+You should get results like below. Make sure node status is Ready.
+
+```
+NAME                                          STATUS   ROLES    AGE   VERSION
+aks-nodepool1-37660971-vmss000000              Ready    agent   24h   v1.25.6
+aks-nodepool1-37660971-vmss000001              Ready    agent   24h   v1.25.6
+aks-nodepool1-37660971-vmss000002              Ready    agent   24h   v1.25.6
+```
+
+## Setup DataHub using Helm
+
+Once the Kubernetes cluster has been set up, you can deploy DataHub and its prerequisites using helm. Please follow the
+steps in this [guide](kubernetes.md). 
+
+
+Notes:
+Since we are using PostgreSQL as the storage layer, change postgresql enabled to true and mysql to false in the values.yaml file of prerequisites.
+Additionally, create a postgresql secret. Make sure to include 3 passwords for the postgresql secret: postgres-password, replication-password, and password.
+
+## Expose endpoints using a load balancer
+
+Now that all the pods are up and running, you need to expose the datahub-frontend end point by setting
+up [ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/). To do this, you need to first set up an
+ingress controller. 
+
+
+There are many [ingress controllers](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/)  to choose
+from, but here, we will follow this [guide](https://learn.microsoft.com/en-us/azure/application-gateway/tutorial-ingress-controller-add-on-existing) to set up the Azure
+Application Gateway Ingress Controller. 
+
+- Deploy a New Application Gateway.
+
+First, you need to create a WAF policy
+
+```
+az network application-gateway waf-policy create -g myResourceGroup -n myWAFPolicy
+```
+
+- Before the application gateway can be deployed, you'll also need to create a public IP resource, a new virtual network with address space 10.0.0.0/16, and a subnet with address space 10.0.0.0/24. 
+Then, you can deploy your application gateway in the subnet using the publicIP.
+
+Caution: When you use an AKS cluster and application gateway in separate virtual networks, the address spaces of the two virtual networks must not overlap. The default address space that an AKS cluster deploys in is 10.224.0.0/12.
+
+
+```
+az network public-ip create -n myPublicIp -g myResourceGroup --allocation-method Static --sku Standard
+az network vnet create -n myVnet -g myResourceGroup --address-prefix 10.0.0.0/16 --subnet-name mySubnet --subnet-prefix 10.0.0.0/24 
+az network application-gateway create -n myApplicationGateway -l eastus -g myResourceGroup --sku WAF_v2 --public-ip-address myPublicIp --vnet-name myVnet --subnet mySubnet --priority 100 --waf-policy /subscriptions/{subscription_id}/resourceGroups/myResourceGroup/providers/Microsoft.Network/ApplicationGatewayWebApplicationFirewallPolicies/myWAFPolicy
+```
+Change myPublicIp, myResourceGroup, myVnet, mySubnet, and myApplicationGateway to names of your choosing.
+
+
+- Enable the AGIC Add-On in Existing AKS Cluster Through Azure CLI
+
+```
+appgwId=$(az network application-gateway show -n myApplicationGateway -g myResourceGroup -o tsv --query "id") 
+az aks enable-addons -n myCluster -g myResourceGroup -a ingress-appgw --appgw-id $appgwId
+```
+
+- Peer the Two Virtual Networks Together
+
+Since you deployed the AKS cluster in its own virtual network and the Application gateway in another virtual network, you'll need to peer the two virtual networks together in order for traffic to flow from the Application gateway to the pods in the cluster.
+
+```
+nodeResourceGroup=$(az aks show -n myCluster -g myResourceGroup -o tsv --query "nodeResourceGroup")
+aksVnetName=$(az network vnet list -g $nodeResourceGroup -o tsv --query "[0].name")
+
+aksVnetId=$(az network vnet show -n $aksVnetName -g $nodeResourceGroup -o tsv --query "id")
+az network vnet peering create -n AppGWtoAKSVnetPeering -g myResourceGroup --vnet-name myVnet --remote-vnet $aksVnetId --allow-vnet-access
+
+appGWVnetId=$(az network vnet show -n myVnet -g myResourceGroup -o tsv --query "id")
+az network vnet peering create -n AKStoAppGWVnetPeering -g $nodeResourceGroup --vnet-name $aksVnetName --remote-vnet $appGWVnetId --allow-vnet-access
+```
+
+- Deploy the Ingress on the Frontend Pod
+
+In order to use the ingress controller to expose frontend pod, we need to update the datahub-frontend section of the values.yaml file that was used to deploy DataHub. Here is a sample configuration:
+
+```
+datahub-frontend:
+  enabled: true
+  image:
+    repository: linkedin/datahub-frontend-react
+    # tag: "v0.10.0 # defaults to .global.datahub.version
+
+  # Set up ingress to expose react front-end
+  ingress:
+    enabled: true
+    annotations:
+      kubernetes.io/ingress.class: azure/application-gateway
+      appgw.ingress.kubernetes.io/backend-protocol: "http" 
+    
+    hosts:
+    - paths:
+      - /*
+  defaultUserCredentials: {}
+```
+
+You can then apply the updates:
+
+```
+helm upgrade --install datahub datahub/datahub --values values.yaml
+```
+
+You can now verify that the ingress was created correctly
+
+```
+kubectl get ingress
+```
+
+You should see a result like this:
+
+![frontend-image](https://github.com/Saketh-Mahesh/azure-docs-images/blob/main/frontend-status.png?raw=true)
+
+## Use PostgresSQL for the storage layer
+Configure a PostgreSQL database in the same virtual network as the Kubernetes cluster or implement virtual network peering to connect both networks. Once the database is provisioned, you should be able to see the following page under the Connect tab on the left side. 
+
+
+Note: PostgreSQL Database MUST be deployed in same location as AKS/resource group (eastus, centralus, etc.)
+Take a note of the connection details:
+
+![postgres-info](https://github.com/Saketh-Mahesh/azure-docs-images/blob/main/postgres-info.png?raw=true)
+
+
+
+
+
+- Update the postgresql settings under global in the values.yaml as follows.
+
+```
+global:
+  sql:
+    datasource:
+      host: "${POSTGRES_HOST}.postgres.database.azure.com:5432"
+      hostForpostgresqlClient: "${POSTGRES_HOST}.postgres.database.azure.com"
+      port: "5432"
+      url: "jdbc:postgresql://${POSTGRES_HOST}.postgres.database.azure.com:5432/datahub?user=${POSTGRES_ADMIN_LOGIN}&password=${POSTGRES_ADMIN_PASSWORD}&sslmode=require"
+      driver: "org.postgresql.Driver"
+      username: "${POSTGRES_ADMIN_LOGIN}"
+      password:
+        value: "${POSTGRES_ADMIN_PASSWORD}"
+```
+Run this command helm command to update datahub configuration
+
+```
+helm upgrade --install datahub datahub/datahub --values values.yaml
+```
+
+And there you go! You have now installed DataHub on an Azure Kubernetes Cluster with an ingress controller set up to expose the frontend. Additionally you have utilized PostgreSQL as the storage layer of DataHub.
\ No newline at end of file

From b3ac42b1e4f43ae2ddcd9e884dc182d3a963f99a Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 18 Oct 2023 13:42:03 -0400
Subject: [PATCH 67/98] feat(frontend/ingestion): Support flagged / warning /
 connection failure statuses; add recipe (#8920)

---
 .../ExecutionRequestDetailsModal.tsx          | 45 +++++++++++++++++--
 .../src/app/ingest/source/utils.ts            | 40 +++++++++++------
 .../src/graphql/ingestion.graphql             |  4 ++
 3 files changed, 73 insertions(+), 16 deletions(-)

diff --git a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
index 849efabdcde97..00fdc89964f88 100644
--- a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
+++ b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
@@ -2,6 +2,7 @@ import { DownloadOutlined } from '@ant-design/icons';
 import { Button, message, Modal, Typography } from 'antd';
 import React, { useEffect, useState } from 'react';
 import styled from 'styled-components';
+import YAML from 'yamljs';
 import { useGetIngestionExecutionRequestQuery } from '../../../../graphql/ingestion.generated';
 import { ANTD_GRAY } from '../../../entity/shared/constants';
 import { downloadFile } from '../../../search/utils/csvUtils';
@@ -65,6 +66,13 @@ const IngestedAssetsSection = styled.div`
     padding-right: 30px;
 `;
 
+const RecipeSection = styled.div`
+    border-top: 1px solid ${ANTD_GRAY[4]};
+    padding-top: 16px;
+    padding-left: 30px;
+    padding-right: 30px;
+`;
+
 const LogsSection = styled.div`
     padding-top: 16px;
     padding-left: 30px;
@@ -91,6 +99,8 @@ type Props = {
 
 export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
     const [showExpandedLogs, setShowExpandedLogs] = useState(false);
+    const [showExpandedRecipe, setShowExpandedRecipe] = useState(false);
+
     const { data, loading, error, refetch } = useGetIngestionExecutionRequestQuery({ variables: { urn } });
     const output = data?.executionRequest?.result?.report || 'No output found.';
 
@@ -120,7 +130,18 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
     const resultSummaryText =
         (result && <Typography.Text type="secondary">{getExecutionRequestSummaryText(result)}</Typography.Text>) ||
         undefined;
-    const isOutputExpandable = output.length > 100;
+
+    const recipeJson = data?.executionRequest?.input.arguments?.find((arg) => arg.key === 'recipe')?.value;
+    let recipeYaml: string;
+    try {
+        recipeYaml = recipeJson && YAML.stringify(JSON.parse(recipeJson), 8, 2).trim();
+    } catch (e) {
+        recipeYaml = '';
+    }
+    const recipe = showExpandedRecipe ? recipeYaml : recipeYaml?.split('\n').slice(0, 1).join('\n');
+
+    const areLogsExpandable = output.length > 100;
+    const isRecipeExpandable = recipeYaml?.includes('\n');
 
     return (
         <Modal
@@ -161,14 +182,32 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
                         </Button>
                     </SectionSubHeader>
                     <Typography.Paragraph ellipsis>
-                        <pre>{`${logs}${!showExpandedLogs && isOutputExpandable ? '...' : ''}`}</pre>
-                        {isOutputExpandable && (
+                        <pre>{`${logs}${!showExpandedLogs && areLogsExpandable ? '...' : ''}`}</pre>
+                        {areLogsExpandable && (
                             <ShowMoreButton type="link" onClick={() => setShowExpandedLogs(!showExpandedLogs)}>
                                 {showExpandedLogs ? 'Hide' : 'Show More'}
                             </ShowMoreButton>
                         )}
                     </Typography.Paragraph>
                 </LogsSection>
+                {recipe && (
+                    <RecipeSection>
+                        <SectionHeader level={5}>Recipe</SectionHeader>
+                        <SectionSubHeader>
+                            <SubHeaderParagraph type="secondary">
+                                The recipe used for this ingestion run.
+                            </SubHeaderParagraph>
+                        </SectionSubHeader>
+                        <Typography.Paragraph ellipsis>
+                            <pre>{`${recipe}${!showExpandedRecipe && isRecipeExpandable ? '\n...' : ''}`}</pre>
+                        </Typography.Paragraph>
+                        {isRecipeExpandable && (
+                            <ShowMoreButton type="link" onClick={() => setShowExpandedRecipe((v) => !v)}>
+                                {showExpandedRecipe ? 'Hide' : 'Show More'}
+                            </ShowMoreButton>
+                        )}
+                    </RecipeSection>
+                )}
             </Section>
         </Modal>
     );
diff --git a/datahub-web-react/src/app/ingest/source/utils.ts b/datahub-web-react/src/app/ingest/source/utils.ts
index c372388e958b7..f789ed8434721 100644
--- a/datahub-web-react/src/app/ingest/source/utils.ts
+++ b/datahub-web-react/src/app/ingest/source/utils.ts
@@ -1,17 +1,19 @@
-import YAML from 'yamljs';
 import {
     CheckCircleOutlined,
     ClockCircleOutlined,
     CloseCircleOutlined,
+    ExclamationCircleOutlined,
     LoadingOutlined,
+    StopOutlined,
     WarningOutlined,
 } from '@ant-design/icons';
-import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants';
+import YAML from 'yamljs';
+import { ListIngestionSourcesDocument, ListIngestionSourcesQuery } from '../../../graphql/ingestion.generated';
 import { EntityType, FacetMetadata } from '../../../types.generated';
-import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil';
 import EntityRegistry from '../../entity/EntityRegistry';
+import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants';
+import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil';
 import { SourceConfig } from './builder/types';
-import { ListIngestionSourcesDocument, ListIngestionSourcesQuery } from '../../../graphql/ingestion.generated';
 
 export const getSourceConfigs = (ingestionSources: SourceConfig[], sourceType: string) => {
     const sourceConfigs = ingestionSources.find((source) => source.name === sourceType);
@@ -40,7 +42,9 @@ export function getPlaceholderRecipe(ingestionSources: SourceConfig[], type?: st
 
 export const RUNNING = 'RUNNING';
 export const SUCCESS = 'SUCCESS';
+export const WARNING = 'WARNING';
 export const FAILURE = 'FAILURE';
+export const CONNECTION_FAILURE = 'CONNECTION_FAILURE';
 export const CANCELLED = 'CANCELLED';
 export const UP_FOR_RETRY = 'UP_FOR_RETRY';
 export const ROLLING_BACK = 'ROLLING_BACK';
@@ -56,8 +60,10 @@ export const getExecutionRequestStatusIcon = (status: string) => {
     return (
         (status === RUNNING && LoadingOutlined) ||
         (status === SUCCESS && CheckCircleOutlined) ||
+        (status === WARNING && ExclamationCircleOutlined) ||
         (status === FAILURE && CloseCircleOutlined) ||
-        (status === CANCELLED && CloseCircleOutlined) ||
+        (status === CONNECTION_FAILURE && CloseCircleOutlined) ||
+        (status === CANCELLED && StopOutlined) ||
         (status === UP_FOR_RETRY && ClockCircleOutlined) ||
         (status === ROLLED_BACK && WarningOutlined) ||
         (status === ROLLING_BACK && LoadingOutlined) ||
@@ -70,7 +76,9 @@ export const getExecutionRequestStatusDisplayText = (status: string) => {
     return (
         (status === RUNNING && 'Running') ||
         (status === SUCCESS && 'Succeeded') ||
+        (status === WARNING && 'Completed') ||
         (status === FAILURE && 'Failed') ||
+        (status === CONNECTION_FAILURE && 'Connection Failed') ||
         (status === CANCELLED && 'Cancelled') ||
         (status === UP_FOR_RETRY && 'Up for Retry') ||
         (status === ROLLED_BACK && 'Rolled Back') ||
@@ -83,21 +91,25 @@ export const getExecutionRequestStatusDisplayText = (status: string) => {
 export const getExecutionRequestSummaryText = (status: string) => {
     switch (status) {
         case RUNNING:
-            return 'Ingestion is running';
+            return 'Ingestion is running...';
         case SUCCESS:
-            return 'Ingestion successfully completed';
+            return 'Ingestion succeeded with no errors or suspected missing data.';
+        case WARNING:
+            return 'Ingestion completed with minor or intermittent errors.';
         case FAILURE:
-            return 'Ingestion completed with errors';
+            return 'Ingestion failed to complete, or completed with serious errors.';
+        case CONNECTION_FAILURE:
+            return 'Ingestion failed due to network, authentication, or permission issues.';
         case CANCELLED:
-            return 'Ingestion was cancelled';
+            return 'Ingestion was cancelled.';
         case ROLLED_BACK:
-            return 'Ingestion was rolled back';
+            return 'Ingestion was rolled back.';
         case ROLLING_BACK:
-            return 'Ingestion is in the process of rolling back';
+            return 'Ingestion is in the process of rolling back.';
         case ROLLBACK_FAILED:
-            return 'Ingestion rollback failed';
+            return 'Ingestion rollback failed.';
         default:
-            return 'Ingestion status not recognized';
+            return 'Ingestion status not recognized.';
     }
 };
 
@@ -105,7 +117,9 @@ export const getExecutionRequestStatusDisplayColor = (status: string) => {
     return (
         (status === RUNNING && REDESIGN_COLORS.BLUE) ||
         (status === SUCCESS && 'green') ||
+        (status === WARNING && 'orangered') ||
         (status === FAILURE && 'red') ||
+        (status === CONNECTION_FAILURE && 'crimson') ||
         (status === UP_FOR_RETRY && 'orange') ||
         (status === CANCELLED && ANTD_GRAY[9]) ||
         (status === ROLLED_BACK && 'orange') ||
diff --git a/datahub-web-react/src/graphql/ingestion.graphql b/datahub-web-react/src/graphql/ingestion.graphql
index 80f66642fe11f..c127e9ec03f9a 100644
--- a/datahub-web-react/src/graphql/ingestion.graphql
+++ b/datahub-web-react/src/graphql/ingestion.graphql
@@ -90,6 +90,10 @@ query getIngestionExecutionRequest($urn: String!) {
             source {
                 type
             }
+            arguments {
+                key
+                value
+            }
         }
         result {
             status

From 1b737243b266843136918ec92f6d20573b999272 Mon Sep 17 00:00:00 2001
From: RyanHolstien <RyanHolstien@users.noreply.github.com>
Date: Wed, 18 Oct 2023 13:45:46 -0500
Subject: [PATCH 68/98] feat(avro): upgrade avro to 1.11 (#9031)

---
 build.gradle                                             | 7 +++----
 buildSrc/build.gradle                                    | 9 ++++++++-
 docker/datahub-frontend/start.sh                         | 1 +
 metadata-dao-impl/kafka-producer/build.gradle            | 4 ++--
 metadata-events/{mxe-avro-1.7 => mxe-avro}/.gitignore    | 0
 metadata-events/{mxe-avro-1.7 => mxe-avro}/build.gradle  | 6 +++---
 metadata-events/mxe-registration/build.gradle            | 2 +-
 metadata-events/mxe-schemas/build.gradle                 | 2 +-
 .../{mxe-utils-avro-1.7 => mxe-utils-avro}/.gitignore    | 0
 .../{mxe-utils-avro-1.7 => mxe-utils-avro}/build.gradle  | 2 +-
 .../src/main/java/com/linkedin/metadata/EventUtils.java  | 0
 .../test/java/com/linkedin/metadata/EventUtilsTests.java | 0
 .../src/test/resources/test-avro2pegasus-mae.json        | 0
 .../src/test/resources/test-avro2pegasus-mce.json        | 0
 .../src/test/resources/test-pegasus2avro-fmce.json       | 0
 .../src/test/resources/test-pegasus2avro-mae.json        | 0
 .../src/test/resources/test-pegasus2avro-mce.json        | 0
 metadata-integration/java/datahub-client/build.gradle    | 2 +-
 .../main/java/datahub/client/kafka/AvroSerializer.java   | 4 +++-
 metadata-io/build.gradle                                 | 4 ++--
 metadata-jobs/mae-consumer/build.gradle                  | 4 ++--
 metadata-jobs/mce-consumer/build.gradle                  | 4 ++--
 metadata-jobs/pe-consumer/build.gradle                   | 4 ++--
 metadata-service/restli-servlet-impl/build.gradle        | 2 +-
 metadata-service/services/build.gradle                   | 4 ++--
 metadata-utils/build.gradle                              | 6 +++---
 settings.gradle                                          | 4 ++--
 27 files changed, 40 insertions(+), 31 deletions(-)
 rename metadata-events/{mxe-avro-1.7 => mxe-avro}/.gitignore (100%)
 rename metadata-events/{mxe-avro-1.7 => mxe-avro}/build.gradle (81%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/.gitignore (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/build.gradle (95%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/main/java/com/linkedin/metadata/EventUtils.java (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/java/com/linkedin/metadata/EventUtilsTests.java (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-avro2pegasus-mae.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-avro2pegasus-mce.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-pegasus2avro-fmce.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-pegasus2avro-mae.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-pegasus2avro-mce.json (100%)

diff --git a/build.gradle b/build.gradle
index 025c588da2b52..cf55a59cfe694 100644
--- a/build.gradle
+++ b/build.gradle
@@ -27,7 +27,7 @@ buildscript {
   dependencies {
     classpath 'com.linkedin.pegasus:gradle-plugins:' + pegasusVersion
     classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4'
-    classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.8.1'
+    classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.2.0'
     classpath 'org.springframework.boot:spring-boot-gradle-plugin:' + springBootVersion
     classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0"
     classpath "com.palantir.gradle.gitversion:gradle-git-version:3.0.0"
@@ -67,8 +67,8 @@ project.ext.externalDependency = [
     'antlr4Runtime': 'org.antlr:antlr4-runtime:4.7.2',
     'antlr4': 'org.antlr:antlr4:4.7.2',
     'assertJ': 'org.assertj:assertj-core:3.11.1',
-    'avro_1_7': 'org.apache.avro:avro:1.7.7',
-    'avroCompiler_1_7': 'org.apache.avro:avro-compiler:1.7.7',
+    'avro': 'org.apache.avro:avro:1.11.3',
+    'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3',
     'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10',
     'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1',
     'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.8',
@@ -127,7 +127,6 @@ project.ext.externalDependency = [
     'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1',
     'jna': 'net.java.dev.jna:jna:5.12.1',
     'jsonPatch': 'com.github.java-json-tools:json-patch:1.13',
-    'jsonSchemaAvro': 'com.github.fge:json-schema-avro:0.1.4',
     'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1',
     'jsonSmart': 'net.minidev:json-smart:2.4.9',
     'json': 'org.json:json:20230227',
diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle
index 65b3780431db9..1f9d30d520171 100644
--- a/buildSrc/build.gradle
+++ b/buildSrc/build.gradle
@@ -5,7 +5,14 @@ buildscript {
 }
 
 dependencies {
-  implementation('io.acryl:json-schema-avro:0.1.5') {
+  /**
+   * Forked version of abandoned repository: https://github.com/fge/json-schema-avro
+   * Maintainer last active 2014, we maintain an active fork of this repository to utilize mapping Avro schemas to Json Schemas,
+   * repository is as close to official library for this as you can get. Original maintainer is one of the authors of Json Schema spec.
+   * Other companies are also separately maintaining forks (like: https://github.com/java-json-tools/json-schema-avro).
+   * We have built several customizations on top of it for various bug fixes, especially around union scheams
+   */
+  implementation('io.acryl:json-schema-avro:0.2.2') {
     exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind'
     exclude group: 'com.google.guava', module: 'guava'
   }
diff --git a/docker/datahub-frontend/start.sh b/docker/datahub-frontend/start.sh
index 9dc1514144bb1..430982aa2456b 100755
--- a/docker/datahub-frontend/start.sh
+++ b/docker/datahub-frontend/start.sh
@@ -50,6 +50,7 @@ export JAVA_OPTS="-Xms512m \
    -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf \
    -Dlogback.configurationFile=datahub-frontend/conf/logback.xml \
    -Dlogback.debug=false \
+   -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 \
    ${PROMETHEUS_AGENT:-} ${OTEL_AGENT:-} \
    ${TRUSTSTORE_FILE:-} ${TRUSTSTORE_TYPE:-} ${TRUSTSTORE_PASSWORD:-} \
    ${HTTP_PROXY:-} ${HTTPS_PROXY:-} ${NO_PROXY:-} \
diff --git a/metadata-dao-impl/kafka-producer/build.gradle b/metadata-dao-impl/kafka-producer/build.gradle
index 393b10b0e9d24..bc3415b2ccc8c 100644
--- a/metadata-dao-impl/kafka-producer/build.gradle
+++ b/metadata-dao-impl/kafka-producer/build.gradle
@@ -1,9 +1,9 @@
 apply plugin: 'java'
 
 dependencies {
-  implementation project(':metadata-events:mxe-avro-1.7')
+  implementation project(':metadata-events:mxe-avro')
   implementation project(':metadata-events:mxe-registration')
-  implementation project(':metadata-events:mxe-utils-avro-1.7')
+  implementation project(':metadata-events:mxe-utils-avro')
   implementation project(':entity-registry')
   implementation project(':metadata-io')
 
diff --git a/metadata-events/mxe-avro-1.7/.gitignore b/metadata-events/mxe-avro/.gitignore
similarity index 100%
rename from metadata-events/mxe-avro-1.7/.gitignore
rename to metadata-events/mxe-avro/.gitignore
diff --git a/metadata-events/mxe-avro-1.7/build.gradle b/metadata-events/mxe-avro/build.gradle
similarity index 81%
rename from metadata-events/mxe-avro-1.7/build.gradle
rename to metadata-events/mxe-avro/build.gradle
index 8c0a26d22dc7d..9d11eeb160ff0 100644
--- a/metadata-events/mxe-avro-1.7/build.gradle
+++ b/metadata-events/mxe-avro/build.gradle
@@ -6,8 +6,8 @@ apply plugin: 'io.acryl.gradle.plugin.avro'
 apply plugin: 'java-library'
 
 dependencies {
-  api externalDependency.avro_1_7
-  implementation(externalDependency.avroCompiler_1_7) {
+  api externalDependency.avro
+  implementation(externalDependency.avroCompiler) {
     exclude group: 'org.apache.velocity', module: 'velocity'
   }
   constraints {
@@ -21,7 +21,7 @@ dependencies {
 
 def genDir = file("src/generated/java")
 
-task avroCodeGen(type: com.commercehub.gradle.plugin.avro.GenerateAvroJavaTask, dependsOn: configurations.avsc) {
+task avroCodeGen(type: com.github.davidmc24.gradle.plugin.avro.GenerateAvroJavaTask, dependsOn: configurations.avsc) {
   source("$rootDir/metadata-events/mxe-schemas/src/renamed/avro")
   outputDir = genDir
   dependsOn(':metadata-events:mxe-schemas:renameNamespace')
diff --git a/metadata-events/mxe-registration/build.gradle b/metadata-events/mxe-registration/build.gradle
index 60e0da59616d9..032870d93329f 100644
--- a/metadata-events/mxe-registration/build.gradle
+++ b/metadata-events/mxe-registration/build.gradle
@@ -5,7 +5,7 @@ configurations {
 }
 
 dependencies {
-  implementation project(':metadata-events:mxe-avro-1.7')
+  implementation project(':metadata-events:mxe-avro')
   implementation project(':metadata-models')
   implementation spec.product.pegasus.dataAvro1_6
 
diff --git a/metadata-events/mxe-schemas/build.gradle b/metadata-events/mxe-schemas/build.gradle
index fe46601fb68b7..8dc8b71bd1cd8 100644
--- a/metadata-events/mxe-schemas/build.gradle
+++ b/metadata-events/mxe-schemas/build.gradle
@@ -1,4 +1,4 @@
-apply plugin: 'java'
+apply plugin: 'java-library'
 apply plugin: 'pegasus'
 
 dependencies {
diff --git a/metadata-events/mxe-utils-avro-1.7/.gitignore b/metadata-events/mxe-utils-avro/.gitignore
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/.gitignore
rename to metadata-events/mxe-utils-avro/.gitignore
diff --git a/metadata-events/mxe-utils-avro-1.7/build.gradle b/metadata-events/mxe-utils-avro/build.gradle
similarity index 95%
rename from metadata-events/mxe-utils-avro-1.7/build.gradle
rename to metadata-events/mxe-utils-avro/build.gradle
index 3b137965d6c19..a7bf287ab224d 100644
--- a/metadata-events/mxe-utils-avro-1.7/build.gradle
+++ b/metadata-events/mxe-utils-avro/build.gradle
@@ -1,7 +1,7 @@
 apply plugin: 'java-library'
 
 dependencies {
-  api project(':metadata-events:mxe-avro-1.7')
+  api project(':metadata-events:mxe-avro')
   api project(':metadata-models')
   api spec.product.pegasus.dataAvro1_6
 
diff --git a/metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java
rename to metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/java/com/linkedin/metadata/EventUtilsTests.java b/metadata-events/mxe-utils-avro/src/test/java/com/linkedin/metadata/EventUtilsTests.java
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/java/com/linkedin/metadata/EventUtilsTests.java
rename to metadata-events/mxe-utils-avro/src/test/java/com/linkedin/metadata/EventUtilsTests.java
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json b/metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mae.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mae.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mce.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mce.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-fmce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-fmce.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-fmce.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-fmce.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mae.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mae.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mce.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mce.json
diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle
index 95de3cdb3c526..e6210f1f073f6 100644
--- a/metadata-integration/java/datahub-client/build.gradle
+++ b/metadata-integration/java/datahub-client/build.gradle
@@ -30,7 +30,7 @@ dependencies {
   implementation(externalDependency.kafkaAvroSerializer) {
     exclude group: "org.apache.avro"
   }
-  implementation externalDependency.avro_1_7
+  implementation externalDependency.avro
   constraints {
     implementation('commons-collections:commons-collections:3.2.2') {
       because 'Vulnerability Issue'
diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java
index ee0d459aaa7d3..6212e57470be4 100644
--- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java
+++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java
@@ -16,12 +16,14 @@ class AvroSerializer {
 
   private final Schema _recordSchema;
   private final Schema _genericAspectSchema;
+  private final Schema _changeTypeEnumSchema;
   private final EventFormatter _eventFormatter;
 
   public AvroSerializer() throws IOException {
     _recordSchema = new Schema.Parser()
         .parse(this.getClass().getClassLoader().getResourceAsStream("MetadataChangeProposal.avsc"));
     _genericAspectSchema = this._recordSchema.getField("aspect").schema().getTypes().get(1);
+    _changeTypeEnumSchema = this._recordSchema.getField("changeType").schema();
     _eventFormatter = new EventFormatter(EventFormatter.Format.PEGASUS_JSON);
   }
 
@@ -43,7 +45,7 @@ public GenericRecord serialize(MetadataChangeProposal mcp) throws IOException {
     genericRecord.put("aspect", genericAspect);
     genericRecord.put("aspectName", mcp.getAspectName());
     genericRecord.put("entityType", mcp.getEntityType());
-    genericRecord.put("changeType", mcp.getChangeType());
+    genericRecord.put("changeType", new GenericData.EnumSymbol(_changeTypeEnumSchema, mcp.getChangeType()));
     return genericRecord;
   }
 }
\ No newline at end of file
diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle
index ad54cf6524398..740fed61f13d5 100644
--- a/metadata-io/build.gradle
+++ b/metadata-io/build.gradle
@@ -8,9 +8,9 @@ configurations {
 dependencies {
   implementation project(':entity-registry')
   api project(':metadata-utils')
-  api project(':metadata-events:mxe-avro-1.7')
+  api project(':metadata-events:mxe-avro')
   api project(':metadata-events:mxe-registration')
-  api project(':metadata-events:mxe-utils-avro-1.7')
+  api project(':metadata-events:mxe-utils-avro')
   api project(':metadata-models')
   api project(':metadata-service:restli-client')
   api project(':metadata-service:configuration')
diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle
index d36fd0de40d03..fcb8b62e4ac9d 100644
--- a/metadata-jobs/mae-consumer/build.gradle
+++ b/metadata-jobs/mae-consumer/build.gradle
@@ -21,9 +21,9 @@ dependencies {
     implementation project(':ingestion-scheduler')
     implementation project(':metadata-utils')
     implementation project(":entity-registry")
-    implementation project(':metadata-events:mxe-avro-1.7')
+    implementation project(':metadata-events:mxe-avro')
     implementation project(':metadata-events:mxe-registration')
-    implementation project(':metadata-events:mxe-utils-avro-1.7')
+    implementation project(':metadata-events:mxe-utils-avro')
     implementation project(':datahub-graphql-core')
 
     implementation externalDependency.elasticSearchRest
diff --git a/metadata-jobs/mce-consumer/build.gradle b/metadata-jobs/mce-consumer/build.gradle
index 0bca55e0e5f92..97eec9fcff051 100644
--- a/metadata-jobs/mce-consumer/build.gradle
+++ b/metadata-jobs/mce-consumer/build.gradle
@@ -17,9 +17,9 @@ dependencies {
     }
     implementation project(':metadata-utils')
     implementation project(':metadata-events:mxe-schemas')
-    implementation project(':metadata-events:mxe-avro-1.7')
+    implementation project(':metadata-events:mxe-avro')
     implementation project(':metadata-events:mxe-registration')
-    implementation project(':metadata-events:mxe-utils-avro-1.7')
+    implementation project(':metadata-events:mxe-utils-avro')
     implementation project(':metadata-io')
     implementation project(':metadata-service:restli-client')
     implementation spec.product.pegasus.restliClient
diff --git a/metadata-jobs/pe-consumer/build.gradle b/metadata-jobs/pe-consumer/build.gradle
index 1899a4de15635..81e8b8c9971f0 100644
--- a/metadata-jobs/pe-consumer/build.gradle
+++ b/metadata-jobs/pe-consumer/build.gradle
@@ -10,9 +10,9 @@ configurations {
 dependencies {
   avro project(path: ':metadata-models', configuration: 'avroSchema')
   implementation project(':li-utils')
-  implementation project(':metadata-events:mxe-avro-1.7')
+  implementation project(':metadata-events:mxe-avro')
   implementation project(':metadata-events:mxe-registration')
-  implementation project(':metadata-events:mxe-utils-avro-1.7')
+  implementation project(':metadata-events:mxe-utils-avro')
   implementation(project(':metadata-service:factories')) {
     exclude group: 'org.neo4j.test'
   }
diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle
index cb307863748c3..de6fb6690e693 100644
--- a/metadata-service/restli-servlet-impl/build.gradle
+++ b/metadata-service/restli-servlet-impl/build.gradle
@@ -48,7 +48,7 @@ dependencies {
   implementation externalDependency.dropwizardMetricsCore
   implementation externalDependency.dropwizardMetricsJmx
 
-  compileOnly externalDependency.lombok
+  implementation externalDependency.lombok
   implementation externalDependency.neo4jJavaDriver
   implementation externalDependency.opentelemetryAnnotations
 
diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle
index 22c62af324c12..b6af3d330d185 100644
--- a/metadata-service/services/build.gradle
+++ b/metadata-service/services/build.gradle
@@ -9,9 +9,9 @@ dependencies {
     implementation externalDependency.jsonPatch
     implementation project(':entity-registry')
     implementation project(':metadata-utils')
-    implementation project(':metadata-events:mxe-avro-1.7')
+    implementation project(':metadata-events:mxe-avro')
     implementation project(':metadata-events:mxe-registration')
-    implementation project(':metadata-events:mxe-utils-avro-1.7')
+    implementation project(':metadata-events:mxe-utils-avro')
     implementation project(':metadata-models')
     implementation project(':metadata-service:restli-client')
     implementation project(':metadata-service:configuration')
diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle
index 1c1c368611488..7bc6aa2d43442 100644
--- a/metadata-utils/build.gradle
+++ b/metadata-utils/build.gradle
@@ -1,7 +1,7 @@
 apply plugin: 'java-library'
 
 dependencies {
-  api externalDependency.avro_1_7
+  api externalDependency.avro
   implementation externalDependency.commonsLang
   api externalDependency.dropwizardMetricsCore
   implementation externalDependency.dropwizardMetricsJmx
@@ -16,8 +16,8 @@ dependencies {
 
   api project(':li-utils')
   api project(':entity-registry')
-  api project(':metadata-events:mxe-avro-1.7')
-  api project(':metadata-events:mxe-utils-avro-1.7')
+  api project(':metadata-events:mxe-avro')
+  api project(':metadata-events:mxe-utils-avro')
 
   implementation externalDependency.slf4jApi
   compileOnly externalDependency.lombok
diff --git a/settings.gradle b/settings.gradle
index d6777b07b3fb3..52de461383b5e 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -20,10 +20,10 @@ include 'metadata-service:openapi-analytics-servlet'
 include 'metadata-service:plugin'
 include 'metadata-service:plugin:src:test:sample-test-plugins'
 include 'metadata-dao-impl:kafka-producer'
-include 'metadata-events:mxe-avro-1.7'
+include 'metadata-events:mxe-avro'
 include 'metadata-events:mxe-registration'
 include 'metadata-events:mxe-schemas'
-include 'metadata-events:mxe-utils-avro-1.7'
+include 'metadata-events:mxe-utils-avro'
 include 'metadata-ingestion'
 include 'metadata-jobs:mae-consumer'
 include 'metadata-jobs:mce-consumer'

From aae1347efce9edf1b5c4512ba3c72569e165947d Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Wed, 18 Oct 2023 16:26:24 -0300
Subject: [PATCH 69/98] fix(search): Detect field type for use in defining the
 sort order (#8992)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../indexbuilder/MappingsBuilder.java         | 48 +++++-------
 .../query/request/SearchRequestHandler.java   |  8 +-
 .../metadata/search/utils/ESUtils.java        | 74 ++++++++++++++++++-
 .../fixtures/SampleDataFixtureTestBase.java   | 64 ++++++++++++++--
 4 files changed, 154 insertions(+), 40 deletions(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
index 004b2e0a2adc4..1edc77bbd214c 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
@@ -5,6 +5,7 @@
 import com.linkedin.metadata.models.SearchScoreFieldSpec;
 import com.linkedin.metadata.models.SearchableFieldSpec;
 import com.linkedin.metadata.models.annotation.SearchableAnnotation.FieldType;
+import com.linkedin.metadata.search.utils.ESUtils;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -31,15 +32,6 @@ public static Map<String, String> getPartialNgramConfigWithOverrides(Map<String,
 
   public static final Map<String, String> KEYWORD_TYPE_MAP = ImmutableMap.of(TYPE, KEYWORD);
 
-  // Field Types
-  public static final String BOOLEAN = "boolean";
-  public static final String DATE = "date";
-  public static final String DOUBLE = "double";
-  public static final String LONG = "long";
-  public static final String OBJECT = "object";
-  public static final String TEXT = "text";
-  public static final String TOKEN_COUNT = "token_count";
-
   // Subfields
   public static final String DELIMITED = "delimited";
   public static final String LENGTH = "length";
@@ -74,7 +66,7 @@ public static Map<String, Object> getMappings(@Nonnull final EntitySpec entitySp
   private static Map<String, Object> getMappingsForUrn() {
     Map<String, Object> subFields = new HashMap<>();
     subFields.put(DELIMITED, ImmutableMap.of(
-            TYPE, TEXT,
+            TYPE, ESUtils.TEXT_FIELD_TYPE,
             ANALYZER, URN_ANALYZER,
             SEARCH_ANALYZER, URN_SEARCH_ANALYZER,
             SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER)
@@ -85,13 +77,13 @@ private static Map<String, Object> getMappingsForUrn() {
             )
     ));
     return ImmutableMap.<String, Object>builder()
-            .put(TYPE, KEYWORD)
+            .put(TYPE, ESUtils.KEYWORD_FIELD_TYPE)
             .put(FIELDS, subFields)
             .build();
   }
 
   private static Map<String, Object> getMappingsForRunId() {
-    return ImmutableMap.<String, Object>builder().put(TYPE, KEYWORD).build();
+    return ImmutableMap.<String, Object>builder().put(TYPE, ESUtils.KEYWORD_FIELD_TYPE).build();
   }
 
   private static Map<String, Object> getMappingsForField(@Nonnull final SearchableFieldSpec searchableFieldSpec) {
@@ -104,23 +96,23 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
     } else if (fieldType == FieldType.TEXT || fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
       mappingForField.putAll(getMappingsForSearchText(fieldType));
     } else if (fieldType == FieldType.BROWSE_PATH) {
-      mappingForField.put(TYPE, TEXT);
+      mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE);
       mappingForField.put(FIELDS,
           ImmutableMap.of(LENGTH, ImmutableMap.of(
-              TYPE, TOKEN_COUNT,
+              TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE,
               ANALYZER, SLASH_PATTERN_ANALYZER)));
       mappingForField.put(ANALYZER, BROWSE_PATH_HIERARCHY_ANALYZER);
       mappingForField.put(FIELDDATA, true);
     } else if (fieldType == FieldType.BROWSE_PATH_V2) {
-      mappingForField.put(TYPE, TEXT);
+      mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE);
       mappingForField.put(FIELDS,
           ImmutableMap.of(LENGTH, ImmutableMap.of(
-              TYPE, TOKEN_COUNT,
+              TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE,
               ANALYZER, UNIT_SEPARATOR_PATTERN_ANALYZER)));
       mappingForField.put(ANALYZER, BROWSE_PATH_V2_HIERARCHY_ANALYZER);
       mappingForField.put(FIELDDATA, true);
     }  else if (fieldType == FieldType.URN || fieldType == FieldType.URN_PARTIAL) {
-      mappingForField.put(TYPE, TEXT);
+      mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE);
       mappingForField.put(ANALYZER, URN_ANALYZER);
       mappingForField.put(SEARCH_ANALYZER, URN_SEARCH_ANALYZER);
       mappingForField.put(SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER);
@@ -135,13 +127,13 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
       subFields.put(KEYWORD, KEYWORD_TYPE_MAP);
       mappingForField.put(FIELDS, subFields);
     } else if (fieldType == FieldType.BOOLEAN) {
-      mappingForField.put(TYPE, BOOLEAN);
+      mappingForField.put(TYPE, ESUtils.BOOLEAN_FIELD_TYPE);
     } else if (fieldType == FieldType.COUNT) {
-      mappingForField.put(TYPE, LONG);
+      mappingForField.put(TYPE, ESUtils.LONG_FIELD_TYPE);
     } else if (fieldType == FieldType.DATETIME) {
-      mappingForField.put(TYPE, DATE);
+      mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
     } else if (fieldType == FieldType.OBJECT) {
-      mappingForField.put(TYPE, OBJECT);
+      mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
     } else {
       log.info("FieldType {} has no mappings implemented", fieldType);
     }
@@ -149,10 +141,10 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
 
     searchableFieldSpec.getSearchableAnnotation()
         .getHasValuesFieldName()
-        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, BOOLEAN)));
+        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.BOOLEAN_FIELD_TYPE)));
     searchableFieldSpec.getSearchableAnnotation()
         .getNumValuesFieldName()
-        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, LONG)));
+        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.LONG_FIELD_TYPE)));
     mappings.putAll(getMappingsForFieldNameAliases(searchableFieldSpec));
 
     return mappings;
@@ -160,7 +152,7 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
 
   private static Map<String, Object> getMappingsForKeyword() {
     Map<String, Object> mappingForField = new HashMap<>();
-    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE);
     mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
     // Add keyword subfield without lowercase filter
     mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP));
@@ -169,7 +161,7 @@ private static Map<String, Object> getMappingsForKeyword() {
 
   private static Map<String, Object> getMappingsForSearchText(FieldType fieldType) {
     Map<String, Object> mappingForField = new HashMap<>();
-    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE);
     mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
     Map<String, Object> subFields = new HashMap<>();
     if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
@@ -186,14 +178,14 @@ private static Map<String, Object> getMappingsForSearchText(FieldType fieldType)
           String fieldName = entry.getKey();
           String analyzerName = entry.getValue();
           subFields.put(fieldName, ImmutableMap.of(
-                  TYPE, TEXT,
+                  TYPE, ESUtils.TEXT_FIELD_TYPE,
                   ANALYZER, analyzerName
           ));
         }
       }
     }
     subFields.put(DELIMITED, ImmutableMap.of(
-            TYPE, TEXT,
+            TYPE, ESUtils.TEXT_FIELD_TYPE,
             ANALYZER, TEXT_ANALYZER,
             SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER,
             SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER));
@@ -206,7 +198,7 @@ private static Map<String, Object> getMappingsForSearchText(FieldType fieldType)
   private static Map<String, Object> getMappingsForSearchScoreField(
       @Nonnull final SearchScoreFieldSpec searchScoreFieldSpec) {
     return ImmutableMap.of(searchScoreFieldSpec.getSearchScoreAnnotation().getFieldName(),
-        ImmutableMap.of(TYPE, DOUBLE));
+        ImmutableMap.of(TYPE, ESUtils.DOUBLE_FIELD_TYPE));
   }
 
   private static Map<String, Object> getMappingsForFieldNameAliases(@Nonnull final SearchableFieldSpec searchableFieldSpec) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
index 5fcc10b7af5cf..c06907e800d5e 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
@@ -202,7 +202,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
     if (!finalSearchFlags.isSkipHighlighting()) {
       searchSourceBuilder.highlighter(_highlights);
     }
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
 
     if (finalSearchFlags.isGetSuggestions()) {
       ESUtils.buildNameSuggestions(searchSourceBuilder, input);
@@ -243,7 +243,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
     searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery));
     _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
     searchSourceBuilder.highlighter(getHighlights());
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
     log.debug("Search request is: " + searchRequest);
     searchRequest.indicesOptions(null);
@@ -270,7 +270,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr
     final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
     searchSourceBuilder.query(filterQuery);
     searchSourceBuilder.from(from).size(size);
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
 
     return searchRequest;
@@ -301,7 +301,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr
     searchSourceBuilder.size(size);
 
     ESUtils.setSearchAfter(searchSourceBuilder, sort, pitId, keepAlive);
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
 
     return searchRequest;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
index 9a7d9a1b4c420..53765acb8e29e 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
@@ -2,6 +2,9 @@
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
+import com.linkedin.metadata.models.EntitySpec;
+import com.linkedin.metadata.models.SearchableFieldSpec;
+import com.linkedin.metadata.models.annotation.SearchableAnnotation;
 import com.linkedin.metadata.query.filter.Condition;
 import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
 import com.linkedin.metadata.query.filter.Criterion;
@@ -49,7 +52,28 @@ public class ESUtils {
   public static final int MAX_RESULT_SIZE = 10000;
   public static final String OPAQUE_ID_HEADER = "X-Opaque-Id";
   public static final String HEADER_VALUE_DELIMITER = "|";
-  public static final String KEYWORD_TYPE = "keyword";
+
+  // Field types
+  public static final String KEYWORD_FIELD_TYPE = "keyword";
+  public static final String BOOLEAN_FIELD_TYPE = "boolean";
+  public static final String DATE_FIELD_TYPE = "date";
+  public static final String DOUBLE_FIELD_TYPE = "double";
+  public static final String LONG_FIELD_TYPE = "long";
+  public static final String OBJECT_FIELD_TYPE = "object";
+  public static final String TEXT_FIELD_TYPE = "text";
+  public static final String TOKEN_COUNT_FIELD_TYPE = "token_count";
+  // End of field types
+
+  public static final Set<SearchableAnnotation.FieldType> FIELD_TYPES_STORED_AS_KEYWORD = Set.of(
+      SearchableAnnotation.FieldType.KEYWORD,
+      SearchableAnnotation.FieldType.TEXT,
+      SearchableAnnotation.FieldType.TEXT_PARTIAL,
+      SearchableAnnotation.FieldType.WORD_GRAM);
+  public static final Set<SearchableAnnotation.FieldType> FIELD_TYPES_STORED_AS_TEXT = Set.of(
+      SearchableAnnotation.FieldType.BROWSE_PATH,
+      SearchableAnnotation.FieldType.BROWSE_PATH_V2,
+      SearchableAnnotation.FieldType.URN,
+      SearchableAnnotation.FieldType.URN_PARTIAL);
   public static final String ENTITY_NAME_FIELD = "_entityName";
   public static final String NAME_SUGGESTION = "nameSuggestion";
 
@@ -174,6 +198,25 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion
     return getQueryBuilderFromCriterionForSingleField(criterion, isTimeseries);
   }
 
+  public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) {
+    if (FIELD_TYPES_STORED_AS_KEYWORD.contains(fieldType)) {
+      return KEYWORD_FIELD_TYPE;
+    } else if (FIELD_TYPES_STORED_AS_TEXT.contains(fieldType)) {
+      return TEXT_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.BOOLEAN) {
+      return BOOLEAN_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.COUNT) {
+      return LONG_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.DATETIME) {
+      return DATE_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.OBJECT) {
+      return OBJECT_FIELD_TYPE;
+    } else {
+      log.warn("FieldType {} has no mappings implemented", fieldType);
+      return null;
+    }
+  }
+
   /**
    * Populates source field of search query with the sort order as per the criterion provided.
    *
@@ -189,14 +232,39 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion
    * @param sortCriterion {@link SortCriterion} to be applied to the search results
    */
   public static void buildSortOrder(@Nonnull SearchSourceBuilder searchSourceBuilder,
-      @Nullable SortCriterion sortCriterion) {
+      @Nullable SortCriterion sortCriterion, List<EntitySpec> entitySpecs) {
     if (sortCriterion == null) {
       searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
     } else {
+      Optional<SearchableAnnotation.FieldType> fieldTypeForDefault = Optional.empty();
+      for (EntitySpec entitySpec : entitySpecs) {
+        List<SearchableFieldSpec> fieldSpecs = entitySpec.getSearchableFieldSpecs();
+        for (SearchableFieldSpec fieldSpec : fieldSpecs) {
+          SearchableAnnotation annotation = fieldSpec.getSearchableAnnotation();
+          if (annotation.getFieldName().equals(sortCriterion.getField())
+              || annotation.getFieldNameAliases().contains(sortCriterion.getField())) {
+            fieldTypeForDefault = Optional.of(fieldSpec.getSearchableAnnotation().getFieldType());
+            break;
+          }
+        }
+        if (fieldTypeForDefault.isPresent()) {
+          break;
+        }
+      }
+      if (fieldTypeForDefault.isEmpty()) {
+        log.warn("Sort criterion field " + sortCriterion.getField() + " was not found in any entity spec to be searched");
+      }
       final SortOrder esSortOrder =
           (sortCriterion.getOrder() == com.linkedin.metadata.query.filter.SortOrder.ASCENDING) ? SortOrder.ASC
               : SortOrder.DESC;
-      searchSourceBuilder.sort(new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder).unmappedType(KEYWORD_TYPE));
+      FieldSortBuilder sortBuilder = new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder);
+      if (fieldTypeForDefault.isPresent()) {
+        String esFieldtype = getElasticTypeForFieldType(fieldTypeForDefault.get());
+        if (esFieldtype != null) {
+          sortBuilder.unmappedType(esFieldtype);
+        }
+      }
+      searchSourceBuilder.sort(sortBuilder);
     }
     if (sortCriterion == null || !sortCriterion.getField().equals(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD)) {
       searchSourceBuilder.sort(new FieldSortBuilder(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD).order(SortOrder.ASC));
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
index 1660504810296..69dd5c80bef1d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
@@ -22,12 +22,15 @@
 import com.linkedin.metadata.query.filter.Criterion;
 import com.linkedin.metadata.query.filter.CriterionArray;
 import com.linkedin.metadata.query.filter.Filter;
+import com.linkedin.metadata.query.filter.SortCriterion;
+import com.linkedin.metadata.query.filter.SortOrder;
 import com.linkedin.metadata.search.AggregationMetadata;
 import com.linkedin.metadata.search.ScrollResult;
 import com.linkedin.metadata.search.SearchEntity;
 import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.search.SearchService;
 import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig;
+import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.r2.RemoteInvocationException;
 import org.junit.Assert;
 import org.opensearch.client.RequestOptions;
@@ -36,6 +39,9 @@
 import org.opensearch.client.indices.AnalyzeResponse;
 import org.opensearch.client.indices.GetMappingsRequest;
 import org.opensearch.client.indices.GetMappingsResponse;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.FieldSortBuilder;
+import org.opensearch.search.sort.SortBuilder;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.Test;
 
@@ -54,11 +60,7 @@
 import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME;
 import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX;
 import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR;
-import static io.datahubproject.test.search.SearchTestUtils.autocomplete;
-import static io.datahubproject.test.search.SearchTestUtils.scroll;
-import static io.datahubproject.test.search.SearchTestUtils.search;
-import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities;
-import static io.datahubproject.test.search.SearchTestUtils.searchStructured;
+import static io.datahubproject.test.search.SearchTestUtils.*;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertNotNull;
@@ -174,6 +176,48 @@ public void testSearchFieldConfig() throws IOException {
         }
     }
 
+    @Test
+    public void testGetSortOrder() {
+        String dateFieldName = "lastOperationTime";
+        List<String> entityNamesToTestSearch = List.of("dataset", "chart", "corpgroup");
+        List<EntitySpec> entitySpecs = entityNamesToTestSearch.stream().map(
+                name -> getEntityRegistry().getEntitySpec(name))
+            .collect(Collectors.toList());
+        SearchSourceBuilder builder = new SearchSourceBuilder();
+        SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.DESCENDING).setField(dateFieldName);
+        ESUtils.buildSortOrder(builder, sortCriterion, entitySpecs);
+        List<SortBuilder<?>> sorts = builder.sorts();
+        assertEquals(sorts.size(), 2); // sort by last modified and then by urn
+        for (SortBuilder sort : sorts) {
+            assertTrue(sort instanceof FieldSortBuilder);
+            FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort;
+            if (fieldSortBuilder.getFieldName().equals(dateFieldName)) {
+                assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.DESC);
+                assertEquals(fieldSortBuilder.unmappedType(), "date");
+            } else {
+                assertEquals(fieldSortBuilder.getFieldName(), "urn");
+            }
+        }
+
+        // Test alias field
+        String entityNameField = "_entityName";
+        SearchSourceBuilder nameBuilder = new SearchSourceBuilder();
+        SortCriterion nameCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField(entityNameField);
+        ESUtils.buildSortOrder(nameBuilder, nameCriterion, entitySpecs);
+        sorts = nameBuilder.sorts();
+        assertEquals(sorts.size(), 2);
+        for (SortBuilder sort : sorts) {
+            assertTrue(sort instanceof FieldSortBuilder);
+            FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort;
+            if (fieldSortBuilder.getFieldName().equals(entityNameField)) {
+                assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.ASC);
+                assertEquals(fieldSortBuilder.unmappedType(), "keyword");
+            } else {
+                assertEquals(fieldSortBuilder.getFieldName(), "urn");
+            }
+        }
+    }
+
     @Test
     public void testDatasetHasTags() throws IOException {
         GetMappingsRequest req = new GetMappingsRequest()
@@ -1454,6 +1498,16 @@ public void testColumnExactMatch() {
                 "Expected table with column name exact match first");
     }
 
+    @Test
+    public void testSortOrdering() {
+        String query = "unit_data";
+        SortCriterion criterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("lastOperationTime");
+        SearchResult result = getSearchService().searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, criterion, 0,
+            100, new SearchFlags().setFulltext(true).setSkipCache(true), null);
+        assertTrue(result.getEntities().size() > 2,
+            String.format("%s - Expected search results to have at least two results", query));
+    }
+
     private Stream<AnalyzeResponse.AnalyzeToken> getTokens(AnalyzeRequest request) throws IOException {
         return getSearchClient().indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream();
     }

From 7855fb60a7e96e6d04d8d96f7505f8b4dd62a7c4 Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Wed, 18 Oct 2023 17:19:10 -0300
Subject: [PATCH 70/98] fix(api): Add preceding / to get index sizes path
 (#9043)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../ElasticSearchTimeseriesAspectService.java    |  2 +-
 .../search/TimeseriesAspectServiceTestBase.java  | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
index a496fc427138e..3e8f83a531b59 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
@@ -169,7 +169,7 @@ public List<TimeseriesIndexSizeResult> getIndexSizes() {
     List<TimeseriesIndexSizeResult> res = new ArrayList<>();
     try {
       String indicesPattern = _indexConvention.getAllTimeseriesAspectIndicesPattern();
-      Response r = _searchClient.getLowLevelClient().performRequest(new Request("GET", indicesPattern + "/_stats"));
+      Response r = _searchClient.getLowLevelClient().performRequest(new Request("GET", "/" + indicesPattern + "/_stats"));
       JsonNode body = new ObjectMapper().readTree(r.getEntity().getContent());
       body.get("indices").fields().forEachRemaining(entry -> {
         TimeseriesIndexSizeResult elemResult = new TimeseriesIndexSizeResult();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
index cc60ba8679e1f..f9b8f84b10ad2 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
@@ -45,6 +45,7 @@
 import com.linkedin.timeseries.GroupingBucket;
 import com.linkedin.timeseries.GroupingBucketType;
 import com.linkedin.timeseries.TimeWindowSize;
+import com.linkedin.timeseries.TimeseriesIndexSizeResult;
 import org.opensearch.client.RestHighLevelClient;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.BeforeClass;
@@ -884,4 +885,19 @@ public void testCountByFilterAfterDelete() throws InterruptedException {
         _elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, urnAndTimeFilter);
     assertEquals(count, 0L);
   }
+
+  @Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
+  public void testGetIndexSizes() {
+    List<TimeseriesIndexSizeResult> result = _elasticSearchTimeseriesAspectService.getIndexSizes();
+    /*
+    Example result:
+    {aspectName=testentityprofile, sizeMb=52.234, indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity}
+    {aspectName=testentityprofile, sizeMb=0.208, indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests}
+     */
+    // There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there
+    assertTrue(result.size() > 1);
+    assertTrue(
+        result.stream().anyMatch(idxSizeResult -> idxSizeResult.getIndexName().equals(
+            "es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1")));
+  }
 }

From 409f981fd3e12a1d470a79cb091ac92e1a4a2c46 Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Wed, 18 Oct 2023 18:25:54 -0300
Subject: [PATCH 71/98] fix(search): Apply SearchFlags passed in through to
 scroll queries (#9041)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../client/CachingEntitySearchService.java    | 13 ++++++----
 .../elasticsearch/ElasticSearchService.java   | 13 ++++++----
 .../query/request/SearchRequestHandler.java   |  4 +++-
 .../search/LineageServiceTestBase.java        | 16 ++++++++++---
 .../request/SearchRequestHandlerTest.java     | 24 +++++++++++++++++++
 .../metadata/search/EntitySearchService.java  |  6 +++--
 6 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
index 13a7d16b723a7..ceaf37a1289d9 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
@@ -256,13 +256,13 @@ public ScrollResult getCachedScrollResults(
         cacheAccess.stop();
         if (result == null) {
           Timer.Context cacheMiss = MetricUtils.timer(this.getClass(), "scroll_cache_miss").time();
-          result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText);
+          result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText, flags);
           cache.put(cacheKey, toJsonString(result));
           cacheMiss.stop();
           MetricUtils.counter(this.getClass(), "scroll_cache_miss_count").inc();
         }
       } else {
-        result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText);
+        result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText, flags);
       }
       return result;
     }
@@ -328,7 +328,8 @@ private ScrollResult getRawScrollResults(
       @Nullable final String scrollId,
       @Nullable final String keepAlive,
       final int count,
-      final boolean fulltext) {
+      final boolean fulltext,
+      @Nullable final SearchFlags searchFlags) {
     if (fulltext) {
       return entitySearchService.fullTextScroll(
           entities,
@@ -337,7 +338,8 @@ private ScrollResult getRawScrollResults(
           sortCriterion,
           scrollId,
           keepAlive,
-          count);
+          count,
+          searchFlags);
     } else {
       return entitySearchService.structuredScroll(entities,
           input,
@@ -345,7 +347,8 @@ private ScrollResult getRawScrollResults(
           sortCriterion,
           scrollId,
           keepAlive,
-          count);
+          count,
+          searchFlags);
     }
   }
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
index ef5a555e95ba8..024cf2b0abec2 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
@@ -175,23 +175,26 @@ public List<String> getBrowsePaths(@Nonnull String entityName, @Nonnull Urn urn)
   @Nonnull
   @Override
   public ScrollResult fullTextScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) {
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags) {
     log.debug(String.format(
         "Scrolling Structured Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s",
         entities, input, postFilters, sortCriterion, scrollId, size));
+    SearchFlags flags = Optional.ofNullable(searchFlags).orElse(new SearchFlags());
+    flags.setFulltext(true);
     return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size,
-            new SearchFlags().setFulltext(true));
+            flags);
   }
 
   @Nonnull
   @Override
   public ScrollResult structuredScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) {
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags) {
     log.debug(String.format(
         "Scrolling FullText Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s",
         entities, input, postFilters, sortCriterion, scrollId, size));
-    return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size,
-            new SearchFlags().setFulltext(false));
+    SearchFlags flags = Optional.ofNullable(searchFlags).orElse(new SearchFlags());
+    flags.setFulltext(false);
+    return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size, flags);
   }
 
   public Optional<SearchResponse> raw(@Nonnull String indexName, @Nullable String jsonQuery) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
index c06907e800d5e..49571a60d5f21 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
@@ -242,7 +242,9 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
     BoolQueryBuilder filterQuery = getFilterQuery(filter);
     searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery));
     _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
-    searchSourceBuilder.highlighter(getHighlights());
+    if (!finalSearchFlags.isSkipHighlighting()) {
+      searchSourceBuilder.highlighter(_highlights);
+    }
     ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
     log.debug("Search request is: " + searchRequest);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
index 461a146022446..696e3b62834bd 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
@@ -47,8 +47,10 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import org.junit.Assert;
+import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
 import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.action.search.SearchRequest;
 import org.springframework.cache.CacheManager;
 import org.springframework.cache.concurrent.ConcurrentMapCacheManager;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
@@ -108,6 +110,7 @@ abstract public class LineageServiceTestBase extends AbstractTestNGSpringContext
   private GraphService _graphService;
   private CacheManager _cacheManager;
   private LineageSearchService _lineageSearchService;
+  private RestHighLevelClient _searchClientSpy;
 
   private static final String ENTITY_NAME = "testEntity";
   private static final Urn TEST_URN = TestEntityUtil.getTestEntityUrn();
@@ -162,10 +165,11 @@ private ElasticSearchService buildEntitySearchService() {
     EntityIndexBuilders indexBuilders =
         new EntityIndexBuilders(getIndexBuilder(), _entityRegistry,
             _indexConvention, _settingsBuilder);
-    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false,
+    _searchClientSpy = spy(getSearchClient());
+    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClientSpy, _indexConvention, false,
         ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null);
-    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration());
-    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1);
+    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClientSpy, _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration());
+    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClientSpy, _indexConvention, getBulkProcessor(), 1);
     return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
   }
 
@@ -246,9 +250,15 @@ public void testSearchService() throws Exception {
     _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString());
     syncAfterWrite(getBulkProcessor());
 
+    Mockito.reset(_searchClientSpy);
     searchResult = searchAcrossLineage(null, TEST1);
     assertEquals(searchResult.getNumEntities().intValue(), 1);
     assertEquals(searchResult.getEntities().get(0).getEntity(), urn);
+    // Verify that highlighting was turned off in the query
+    ArgumentCaptor<SearchRequest> searchRequestCaptor = ArgumentCaptor.forClass(SearchRequest.class);
+    Mockito.verify(_searchClientSpy, times(1)).search(searchRequestCaptor.capture(), any());
+    SearchRequest capturedRequest = searchRequestCaptor.getValue();
+    assertNull(capturedRequest.source().highlighter());
     clearCache(false);
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index 90c6c523c588f..0ea035a10f91d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -97,6 +97,30 @@ public void testDatasetFieldsAndHighlights() {
     ), "unexpected lineage fields in highlights: " + highlightFields);
   }
 
+  @Test
+  public void testSearchRequestHandlerHighlightingTurnedOff() {
+    SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null);
+    SearchRequest searchRequest = requestHandler.getSearchRequest("testQuery", null, null, 0,
+        10,  new SearchFlags().setFulltext(false).setSkipHighlighting(true), null);
+    SearchSourceBuilder sourceBuilder = searchRequest.source();
+    assertEquals(sourceBuilder.from(), 0);
+    assertEquals(sourceBuilder.size(), 10);
+    // Filters
+    Collection<AggregationBuilder> aggBuilders = sourceBuilder.aggregations().getAggregatorFactories();
+    // Expect 2 aggregations: textFieldOverride and _index
+    assertEquals(aggBuilders.size(), 2);
+    for (AggregationBuilder aggBuilder : aggBuilders) {
+      if (aggBuilder.getName().equals("textFieldOverride")) {
+        TermsAggregationBuilder filterPanelBuilder = (TermsAggregationBuilder) aggBuilder;
+        assertEquals(filterPanelBuilder.field(), "textFieldOverride.keyword");
+      } else if (!aggBuilder.getName().equals("_entityType")) {
+        fail("Found unexepected aggregation: " + aggBuilder.getName());
+      }
+    }
+    // Highlights should not be present
+    assertNull(sourceBuilder.highlighter());
+  }
+
   @Test
   public void testSearchRequestHandler() {
     SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null);
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
index a46b58aabfb0b..64f59780b887f 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
@@ -188,11 +188,12 @@ BrowseResult browse(@Nonnull String entityName, @Nonnull String path, @Nullable
    * @param sortCriterion {@link SortCriterion} to be applied to search results
    * @param scrollId opaque scroll identifier to pass to search service
    * @param size the number of search hits to return
+   * @param searchFlags flags controlling search options
    * @return a {@link ScrollResult} that contains a list of matched documents and related search result metadata
    */
   @Nonnull
   ScrollResult fullTextScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size);
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags);
 
   /**
    * Gets a list of documents that match given search request. The results are aggregated and filters are applied to the
@@ -204,11 +205,12 @@ ScrollResult fullTextScroll(@Nonnull List<String> entities, @Nonnull String inpu
    * @param sortCriterion {@link SortCriterion} to be applied to search results
    * @param scrollId opaque scroll identifier to pass to search service
    * @param size the number of search hits to return
+   * @param searchFlags flags controlling search options
    * @return a {@link ScrollResult} that contains a list of matched documents and related search result metadata
    */
   @Nonnull
   ScrollResult structuredScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size);
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags);
 
   /**
    * Max result size returned by the underlying search backend

From 269c4eac7ef09d73224050e432bfbf60727e4d65 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Thu, 19 Oct 2023 01:43:05 +0100
Subject: [PATCH 72/98] fix(ownership): Corrects validation of ownership type
 and makes it consistent across graphQL calls (#9044)

Co-authored-by: Ellie O'Neil <oneile729@gmail.com>
---
 .../resolvers/mutate/AddOwnerResolver.java    |  27 ++-
 .../resolvers/mutate/AddOwnersResolver.java   |   2 +-
 .../mutate/BatchAddOwnersResolver.java        |   3 +-
 .../resolvers/mutate/util/OwnerUtils.java     |  65 +++-----
 .../owner/AddOwnersResolverTest.java          | 157 ++++++++++++++++--
 5 files changed, 183 insertions(+), 71 deletions(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java
index 5ca7007d98e43..3f2dab0a5ba71 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java
@@ -2,14 +2,11 @@
 
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.CorpuserUrn;
-
 import com.linkedin.common.urn.Urn;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
 import com.linkedin.datahub.graphql.generated.AddOwnerInput;
-import com.linkedin.datahub.graphql.generated.OwnerEntityType;
 import com.linkedin.datahub.graphql.generated.OwnerInput;
-import com.linkedin.datahub.graphql.generated.OwnershipType;
 import com.linkedin.datahub.graphql.generated.ResourceRefInput;
 import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils;
 import com.linkedin.metadata.entity.EntityService;
@@ -20,7 +17,6 @@
 import lombok.extern.slf4j.Slf4j;
 
 import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;
-import static com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils.*;
 
 
 @Slf4j
@@ -32,30 +28,33 @@ public class AddOwnerResolver implements DataFetcher<CompletableFuture<Boolean>>
   @Override
   public CompletableFuture<Boolean> get(DataFetchingEnvironment environment) throws Exception {
     final AddOwnerInput input = bindArgument(environment.getArgument("input"), AddOwnerInput.class);
-
     Urn ownerUrn = Urn.createFromString(input.getOwnerUrn());
-    OwnerEntityType ownerEntityType = input.getOwnerEntityType();
-    OwnershipType type = input.getType() == null ? OwnershipType.NONE : input.getType();
-    String ownershipUrn = input.getOwnershipTypeUrn() == null ? mapOwnershipTypeToEntity(type.name()) : input.getOwnershipTypeUrn();
     Urn targetUrn = Urn.createFromString(input.getResourceUrn());
+    OwnerInput.Builder ownerInputBuilder = OwnerInput.builder();
+    ownerInputBuilder.setOwnerUrn(input.getOwnerUrn());
+    ownerInputBuilder.setOwnerEntityType(input.getOwnerEntityType());
+    if (input.getType() != null) {
+      ownerInputBuilder.setType(input.getType());
+    }
+    if (input.getOwnershipTypeUrn() != null) {
+      ownerInputBuilder.setOwnershipTypeUrn(input.getOwnershipTypeUrn());
+    }
 
+    OwnerInput ownerInput = ownerInputBuilder.build();
     if (!OwnerUtils.isAuthorizedToUpdateOwners(environment.getContext(), targetUrn)) {
       throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
     }
 
     return CompletableFuture.supplyAsync(() -> {
-      OwnerUtils.validateAddInput(
-          ownerUrn, input.getOwnershipTypeUrn(), ownerEntityType,
-          targetUrn,
-          _entityService
-      );
+      OwnerUtils.validateAddOwnerInput(ownerInput, ownerUrn, _entityService);
+
       try {
 
         log.debug("Adding Owner. input: {}", input);
 
         Urn actor = CorpuserUrn.createFromString(((QueryContext) environment.getContext()).getActorUrn());
         OwnerUtils.addOwnersToResources(
-            ImmutableList.of(new OwnerInput(input.getOwnerUrn(), ownerEntityType, type, ownershipUrn)),
+            ImmutableList.of(ownerInput),
             ImmutableList.of(new ResourceRefInput(input.getResourceUrn(), null, null)),
             actor,
             _entityService
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java
index 06424efa83819..4e5b5bdb2a651 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java
@@ -39,7 +39,7 @@ public CompletableFuture<Boolean> get(DataFetchingEnvironment environment) throw
         throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
       }
 
-      OwnerUtils.validateAddInput(
+      OwnerUtils.validateAddOwnerInput(
           owners,
           targetUrn,
           _entityService
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java
index 019c044d81ab3..5beaeecae673f 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java
@@ -53,8 +53,7 @@ public CompletableFuture<Boolean> get(DataFetchingEnvironment environment) throw
 
   private void validateOwners(List<OwnerInput> owners) {
     for (OwnerInput ownerInput : owners) {
-      OwnerUtils.validateOwner(UrnUtils.getUrn(ownerInput.getOwnerUrn()), ownerInput.getOwnerEntityType(),
-          UrnUtils.getUrn(ownerInput.getOwnershipTypeUrn()), _entityService);
+      OwnerUtils.validateOwner(ownerInput, _entityService);
     }
   }
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java
index d2f7f896e5953..7233995804423 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java
@@ -50,7 +50,7 @@ public static void addOwnersToResources(
   ) {
     final List<MetadataChangeProposal> changes = new ArrayList<>();
     for (ResourceRefInput resource : resources) {
-      changes.add(buildAddOwnersProposal(owners, UrnUtils.getUrn(resource.getResourceUrn()), actor, entityService));
+      changes.add(buildAddOwnersProposal(owners, UrnUtils.getUrn(resource.getResourceUrn()), entityService));
     }
     EntityUtils.ingestChangeProposals(changes, entityService, actor, false);
   }
@@ -69,7 +69,7 @@ public static void removeOwnersFromResources(
   }
 
 
-  private static MetadataChangeProposal buildAddOwnersProposal(List<OwnerInput> owners, Urn resourceUrn, Urn actor, EntityService entityService) {
+  static MetadataChangeProposal buildAddOwnersProposal(List<OwnerInput> owners, Urn resourceUrn, EntityService entityService) {
     Ownership ownershipAspect = (Ownership) EntityUtils.getAspectFromEntity(
         resourceUrn.toString(),
         Constants.OWNERSHIP_ASPECT_NAME, entityService,
@@ -181,18 +181,13 @@ public static boolean isAuthorizedToUpdateOwners(@Nonnull QueryContext context,
         orPrivilegeGroups);
   }
 
-  public static Boolean validateAddInput(
+  public static Boolean validateAddOwnerInput(
       List<OwnerInput> owners,
       Urn resourceUrn,
       EntityService entityService
   ) {
     for (OwnerInput owner : owners) {
-      boolean result = validateAddInput(
-          UrnUtils.getUrn(owner.getOwnerUrn()),
-          owner.getOwnershipTypeUrn(),
-          owner.getOwnerEntityType(),
-          resourceUrn,
-          entityService);
+      boolean result = validateAddOwnerInput(owner, resourceUrn, entityService);
       if (!result) {
         return false;
       }
@@ -200,44 +195,29 @@ public static Boolean validateAddInput(
     return true;
   }
 
-  public static Boolean validateAddInput(
-      Urn ownerUrn,
-      String ownershipEntityUrn,
-      OwnerEntityType ownerEntityType,
+  public static Boolean validateAddOwnerInput(
+      OwnerInput owner,
       Urn resourceUrn,
       EntityService entityService
   ) {
 
-    if (OwnerEntityType.CORP_GROUP.equals(ownerEntityType) && !Constants.CORP_GROUP_ENTITY_NAME.equals(ownerUrn.getEntityType())) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Expected a corp group urn.", resourceUrn));
-    }
-
-    if (OwnerEntityType.CORP_USER.equals(ownerEntityType) && !Constants.CORP_USER_ENTITY_NAME.equals(ownerUrn.getEntityType())) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Expected a corp user urn.", resourceUrn));
-    }
-
     if (!entityService.exists(resourceUrn)) {
       throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Resource does not exist.", resourceUrn));
     }
 
-    if (!entityService.exists(ownerUrn)) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Owner %s does not exist.", resourceUrn, ownerUrn));
-    }
-
-    if (ownershipEntityUrn != null && !entityService.exists(UrnUtils.getUrn(ownershipEntityUrn))) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership type for resource %s. Ownership Type "
-          + "%s does not exist.", resourceUrn, ownershipEntityUrn));
-    }
+    validateOwner(owner, entityService);
 
     return true;
   }
 
   public static void validateOwner(
-      Urn ownerUrn,
-      OwnerEntityType ownerEntityType,
-      Urn ownershipEntityUrn,
+      OwnerInput owner,
       EntityService entityService
   ) {
+
+    OwnerEntityType ownerEntityType = owner.getOwnerEntityType();
+    Urn ownerUrn = UrnUtils.getUrn(owner.getOwnerUrn());
+
     if (OwnerEntityType.CORP_GROUP.equals(ownerEntityType) && !Constants.CORP_GROUP_ENTITY_NAME.equals(ownerUrn.getEntityType())) {
       throw new IllegalArgumentException(
           String.format("Failed to change ownership for resource(s). Expected a corp group urn, found %s", ownerUrn));
@@ -252,9 +232,14 @@ public static void validateOwner(
       throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Owner with urn %s does not exist.", ownerUrn));
     }
 
-    if (!entityService.exists(ownershipEntityUrn)) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Ownership type with "
-          + "urn %s does not exist.", ownershipEntityUrn));
+    if (owner.getOwnershipTypeUrn() != null && !entityService.exists(UrnUtils.getUrn(owner.getOwnershipTypeUrn()))) {
+      throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Custom Ownership type with "
+          + "urn %s does not exist.", owner.getOwnershipTypeUrn()));
+    }
+
+    if (owner.getType() == null && owner.getOwnershipTypeUrn() == null) {
+      throw new IllegalArgumentException("Failed to change ownership for resource(s). Expected either "
+          + "type or ownershipTypeUrn to be specified.");
     }
   }
 
@@ -269,11 +254,11 @@ public static Boolean validateRemoveInput(
   }
 
   public static void addCreatorAsOwner(
-    QueryContext context,
-    String urn,
-    OwnerEntityType ownerEntityType,
-    OwnershipType ownershipType,
-    EntityService entityService) {
+      QueryContext context,
+      String urn,
+      OwnerEntityType ownerEntityType,
+      OwnershipType ownershipType,
+      EntityService entityService) {
     try {
       Urn actorUrn = CorpuserUrn.createFromString(context.getActorUrn());
       String ownershipTypeUrn = mapOwnershipTypeToEntity(ownershipType.name());
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java
index efc0c5dfcf36d..329d71ec125db 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java
@@ -2,6 +2,11 @@
 
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
+import com.linkedin.common.Owner;
+import com.linkedin.common.OwnerArray;
+import com.linkedin.common.Ownership;
+import com.linkedin.common.OwnershipSource;
+import com.linkedin.common.OwnershipSourceType;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
@@ -28,6 +33,7 @@ public class AddOwnersResolverTest {
   private static final String TEST_ENTITY_URN = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)";
   private static final String TEST_OWNER_1_URN = "urn:li:corpuser:test-id-1";
   private static final String TEST_OWNER_2_URN = "urn:li:corpuser:test-id-2";
+  private static final String TEST_OWNER_3_URN = "urn:li:corpGroup:test-id-3";
 
   @Test
   public void testGetSuccessNoExistingOwners() throws Exception {
@@ -75,33 +81,41 @@ public void testGetSuccessNoExistingOwners() throws Exception {
   }
 
   @Test
-  public void testGetSuccessExistingOwners() throws Exception {
+  public void testGetSuccessExistingOwnerNewType() throws Exception {
     EntityService mockService = getMockEntityService();
 
+    com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray(
+            ImmutableList.of(new Owner()
+                    .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN))
+                    .setType(com.linkedin.common.OwnershipType.NONE)
+                    .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL))
+            )));
+
     Mockito.when(mockService.getAspect(
-        Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
-        Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
-        Mockito.eq(0L)))
-        .thenReturn(null);
+                    Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
+                    Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
+                    Mockito.eq(0L)))
+            .thenReturn(oldOwnership);
 
     Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true);
     Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true);
-    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_2_URN))).thenReturn(true);
 
     Mockito.when(mockService.exists(Urn.createFromString(
-            OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
-        .thenReturn(true);
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
+            .thenReturn(true);
 
     AddOwnersResolver resolver = new AddOwnersResolver(mockService);
 
     // Execute resolver
     QueryContext mockContext = getMockAllowContext();
     DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+
     AddOwnersInput input = new AddOwnersInput(ImmutableList.of(
-        new OwnerInput(TEST_OWNER_1_URN, OwnerEntityType.CORP_USER, OwnershipType.TECHNICAL_OWNER,
-            OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())),
-        new OwnerInput(TEST_OWNER_2_URN, OwnerEntityType.CORP_USER, OwnershipType.TECHNICAL_OWNER,
-            OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+            OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_1_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build()
     ), TEST_ENTITY_URN);
     Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
     Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
@@ -111,11 +125,126 @@ public void testGetSuccessExistingOwners() throws Exception {
     verifyIngestProposal(mockService, 1);
 
     Mockito.verify(mockService, Mockito.times(1)).exists(
-        Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
+            Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
     );
+  }
+
+  @Test
+  public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception {
+    EntityService mockService = getMockEntityService();
+
+    com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray(
+            ImmutableList.of(new Owner()
+                    .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN))
+                    .setType(com.linkedin.common.OwnershipType.TECHNICAL_OWNER)
+                    .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL))
+            )));
+
+    Mockito.when(mockService.getAspect(
+                    Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
+                    Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
+                    Mockito.eq(0L)))
+            .thenReturn(oldOwnership);
+
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true);
+
+    Mockito.when(mockService.exists(Urn.createFromString(
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
+            .thenReturn(true);
+
+    AddOwnersResolver resolver = new AddOwnersResolver(mockService);
+
+    // Execute resolver
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+
+    AddOwnersInput input = new AddOwnersInput(ImmutableList.of(OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_1_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build()
+    ), TEST_ENTITY_URN);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+    assertTrue(resolver.get(mockEnv).get());
+
+    // Unable to easily validate exact payload due to the injected timestamp
+    verifyIngestProposal(mockService, 1);
 
     Mockito.verify(mockService, Mockito.times(1)).exists(
-        Mockito.eq(Urn.createFromString(TEST_OWNER_2_URN))
+            Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
+    );
+  }
+
+  @Test
+  public void testGetSuccessMultipleOwnerTypes() throws Exception {
+    EntityService mockService = getMockEntityService();
+
+    com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray(
+            ImmutableList.of(new Owner()
+                    .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN))
+                    .setType(com.linkedin.common.OwnershipType.NONE)
+                    .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL))
+            )));
+
+    Mockito.when(mockService.getAspect(
+                    Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
+                    Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
+                    Mockito.eq(0L)))
+            .thenReturn(oldOwnership);
+
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_2_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_3_URN))).thenReturn(true);
+
+    Mockito.when(mockService.exists(Urn.createFromString(
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
+            .thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER.name()))))
+            .thenReturn(true);
+
+    AddOwnersResolver resolver = new AddOwnersResolver(mockService);
+
+    // Execute resolver
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+
+    AddOwnersInput input = new AddOwnersInput(ImmutableList.of(OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_1_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build(),
+            OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_2_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.BUSINESS_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build(),
+            OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_3_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_GROUP)
+                    .build()
+    ), TEST_ENTITY_URN);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+    assertTrue(resolver.get(mockEnv).get());
+
+    // Unable to easily validate exact payload due to the injected timestamp
+    verifyIngestProposal(mockService, 1);
+
+    Mockito.verify(mockService, Mockito.times(1)).exists(
+            Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
+    );
+
+    Mockito.verify(mockService, Mockito.times(1)).exists(
+            Mockito.eq(Urn.createFromString(TEST_OWNER_2_URN))
+    );
+
+    Mockito.verify(mockService, Mockito.times(1)).exists(
+            Mockito.eq(Urn.createFromString(TEST_OWNER_3_URN))
     );
   }
 

From 75b36c41ee4fd74891b1bfe37885b4cd840e2906 Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Thu, 19 Oct 2023 08:32:24 -0700
Subject: [PATCH 73/98] docs(protobuf) Update messaging around nesting messages
 (#9048)

---
 metadata-integration/java/datahub-protobuf/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-integration/java/datahub-protobuf/README.md b/metadata-integration/java/datahub-protobuf/README.md
index daea8d438679c..29b82aa3e68f5 100644
--- a/metadata-integration/java/datahub-protobuf/README.md
+++ b/metadata-integration/java/datahub-protobuf/README.md
@@ -1,6 +1,6 @@
 # Protobuf Schemas
 
-The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. In addition, you can supply the root message in cases where a single protobuf source file includes multiple non-nested messages.
+The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. You can supply a file with multiple nested messages to be processed. If you have a file with multiple non-nested messages, you will need to separate them out into different files or supply the root message, as otherwise we will only process the first one.
 
 ## Supported Features
 

From b1abd38a6b4aef3da0c50ecd23612cae7e3c5d28 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Thu, 19 Oct 2023 15:33:54 -0400
Subject: [PATCH 74/98] refactor(): Use data-testids for glossary_navigation
 and dataset_ownership tests (#9033)

---
 .../CreateGlossaryEntityModal.tsx             |  7 ++-
 .../shared/EntityDropdown/EntityDropdown.tsx  |  3 +-
 .../MoveGlossaryEntityModal.tsx               |  5 +-
 .../Ownership/sidebar/SidebarOwnerSection.tsx |  6 +-
 .../src/app/glossary/BusinessGlossaryPage.tsx |  4 +-
 .../src/app/glossary/GlossarySidebar.tsx      |  2 +-
 .../e2e/glossary/glossary_navigation.js       | 55 +++++++++++--------
 .../cypress/e2e/lineage/lineage_graph.js      |  2 -
 .../e2e/mutations/dataset_ownership.js        |  2 +-
 .../tests/cypress/cypress/support/commands.js |  1 +
 10 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx
index d48ead2f5863e..9788d36af2c65 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx
@@ -112,7 +112,11 @@ function CreateGlossaryEntityModal(props: Props) {
                     <Button onClick={onClose} type="text">
                         Cancel
                     </Button>
-                    <Button onClick={createGlossaryEntity} disabled={createButtonDisabled}>
+                    <Button
+                        onClick={createGlossaryEntity}
+                        disabled={createButtonDisabled}
+                        data-testid="glossary-entity-modal-create-button"
+                    >
                         Create
                     </Button>
                 </>
@@ -130,6 +134,7 @@ function CreateGlossaryEntityModal(props: Props) {
             >
                 <Form.Item label={<Typography.Text strong>Name</Typography.Text>}>
                     <StyledItem
+                        data-testid="create-glossary-entity-modal-name"
                         name="name"
                         rules={[
                             {
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
index bfb7ff7e540c4..5d4f9d9f875cf 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
@@ -199,11 +199,12 @@ function EntityDropdown(props: Props) {
                         )}
                         {!isDomainMoveHidden && menuItems.has(EntityMenuItems.MOVE) && (
                             <StyledMenuItem
+                                data-testid="entity-menu-move-button"
                                 key="4"
                                 disabled={isMoveDisabled(entityType, entityData, me.platformPrivileges)}
                                 onClick={() => setIsMoveModalVisible(true)}
                             >
-                                <MenuItem data-testid="entity-menu-move-button">
+                                <MenuItem>
                                     <FolderOpenOutlined /> &nbsp;Move
                                 </MenuItem>
                             </StyledMenuItem>
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx
index 5352825708776..37a625f58100b 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx
@@ -64,6 +64,7 @@ function MoveGlossaryEntityModal(props: Props) {
 
     return (
         <Modal
+            data-testid="move-glossary-entity-modal"
             title="Move"
             visible
             onCancel={onClose}
@@ -72,7 +73,9 @@ function MoveGlossaryEntityModal(props: Props) {
                     <Button onClick={onClose} type="text">
                         Cancel
                     </Button>
-                    <Button onClick={moveGlossaryEntity}>Move</Button>
+                    <Button onClick={moveGlossaryEntity} data-testid="glossary-entity-modal-move-button">
+                        Move
+                    </Button>
                 </>
             }
         >
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx
index 57743d0531afe..aa9a337d4ba44 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx
@@ -70,7 +70,11 @@ export const SidebarOwnerSection = ({ properties, readOnly }: Props) => {
                     </Typography.Paragraph>
                 )}
                 {!readOnly && (
-                    <Button type={ownersEmpty ? 'default' : 'text'} onClick={() => setShowAddModal(true)}>
+                    <Button
+                        type={ownersEmpty ? 'default' : 'text'}
+                        onClick={() => setShowAddModal(true)}
+                        data-testid="add-owners-button"
+                    >
                         <PlusOutlined /> Add Owners
                     </Button>
                 )}
diff --git a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
index 11f54cb5078e6..a5262265fd23d 100644
--- a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
+++ b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
@@ -92,11 +92,12 @@ function BusinessGlossaryPage() {
                 {(termsError || nodesError) && (
                     <Message type="error" content="Failed to load glossary! An unexpected error occurred." />
                 )}
-                <MainContentWrapper>
+                <MainContentWrapper data-testid="glossary-entities-list">
                     <HeaderWrapper>
                         <Typography.Title level={3}>Business Glossary</Typography.Title>
                         <div>
                             <Button
+                                data-testid="add-term-button"
                                 id={BUSINESS_GLOSSARY_CREATE_TERM_ID}
                                 disabled={!canManageGlossaries}
                                 type="text"
@@ -105,6 +106,7 @@ function BusinessGlossaryPage() {
                                 <PlusOutlined /> Add Term
                             </Button>
                             <Button
+                                data-testid="add-term-group-button"
                                 id={BUSINESS_GLOSSARY_CREATE_TERM_GROUP_ID}
                                 disabled={!canManageGlossaries}
                                 type="text"
diff --git a/datahub-web-react/src/app/glossary/GlossarySidebar.tsx b/datahub-web-react/src/app/glossary/GlossarySidebar.tsx
index 2d620fb06df38..4126c8f2bb53f 100644
--- a/datahub-web-react/src/app/glossary/GlossarySidebar.tsx
+++ b/datahub-web-react/src/app/glossary/GlossarySidebar.tsx
@@ -12,7 +12,7 @@ export default function GlossarySidebar() {
 
     return (
         <>
-            <SidebarWrapper width={browserWidth}>
+            <SidebarWrapper width={browserWidth} data-testid="glossary-browser-sidebar">
                 <GlossarySearch />
                 <GlossaryBrowser openToEntity />
             </SidebarWrapper>
diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
index e0d2bf240d74d..f52e4d3984a88 100644
--- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
@@ -4,42 +4,48 @@ const glossaryParentGroup = "Cypress";
 
 describe("glossary sidebar navigation test", () => {
     it("create term and term parent group, move and delete term group", () => {
-        //create a new term group and term, move term to the group
+
+        // Create a new term group and term, move term to the group
         cy.loginWithCredentials();
         cy.goToGlossaryList();
-        cy.clickOptionWithText("Add Term Group");
+        cy.clickOptionWithTestId("add-term-group-button");
         cy.waitTextVisible("Create Term Group");
-        cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(glossaryTermGroup);
-        cy.get(".ant-modal-footer > button:last-child").click();
-        cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTermGroup).should("be.visible");
-        cy.clickOptionWithText("Add Term");
+        cy.enterTextInTestId("create-glossary-entity-modal-name", glossaryTermGroup);
+        cy.clickOptionWithTestId("glossary-entity-modal-create-button");
+        cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).should("be.visible");
+        cy.clickOptionWithTestId("add-term-button"); 
+        cy.waitTextVisible("Created Term Group!");
         cy.waitTextVisible("Create Glossary Term");
-        cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(glossaryTerm);
-        cy.get(".ant-modal-footer > button:last-child").click();
-        cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTerm).click();
-        cy.waitTextVisible("No documentation yet");
+        cy.enterTextInTestId("create-glossary-entity-modal-name", glossaryTerm);
+        cy.clickOptionWithTestId("glossary-entity-modal-create-button");
+        cy.waitTextVisible("Created Glossary Term!");
+        cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTerm).click().wait(3000);
         cy.openThreeDotDropdown();
-        cy.clickOptionWithText("Move");
-        cy.get('[role="dialog"]').contains(glossaryTermGroup).click({force: true});
-        cy.get('[role="dialog"]').contains(glossaryTermGroup).should("be.visible");
-        cy.get("button").contains("Move").click();
+        cy.clickOptionWithTestId("entity-menu-move-button")
+        cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryTermGroup).click({force: true});
+        cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryTermGroup).should("be.visible");
+        cy.clickOptionWithTestId("glossary-entity-modal-move-button");
         cy.waitTextVisible("Moved Glossary Term!");
-        //ensure the new term is under the parent term group in the navigation sidebar
-        cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTermGroup).click();
+
+        // Ensure the new term is under the parent term group in the navigation sidebar
+        cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).click();
         cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTerm).should("be.visible");
-        //move a term group from the root level to be under a parent term group
+
+        // Move a term group from the root level to be under a parent term group
         cy.goToGlossaryList();
         cy.clickOptionWithText(glossaryTermGroup);
         cy.openThreeDotDropdown();
         cy.clickOptionWithText("Move");
-        cy.get('[role="dialog"]').contains(glossaryParentGroup).click({force: true});
-        cy.get('[role="dialog"]').contains(glossaryParentGroup).should("be.visible");
-        cy.get("button").contains("Move").click();
+        cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryParentGroup).click({force: true});
+        cy.get('[data-testid="move-glossary-entity-modal"]').contains(glossaryParentGroup).should("be.visible");
+        cy.clickOptionWithTestId("glossary-entity-modal-move-button");
         cy.waitTextVisible("Moved Term Group!");
-        //ensure it is no longer on the sidebar navigator at the top level but shows up under the new parent
-        cy.get('*[class^="GlossaryBrowser"]').contains(glossaryParentGroup).click();
+
+        // Ensure it is no longer on the sidebar navigator at the top level but shows up under the new parent
+        cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryParentGroup).click();
         cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTermGroup).should("be.visible");
-        //delete a term group
+
+        // Delete a term group
         cy.goToGlossaryList();
         cy.clickOptionWithText(glossaryParentGroup);
         cy.clickOptionWithText(glossaryTermGroup);
@@ -50,7 +56,8 @@ describe("glossary sidebar navigation test", () => {
         cy.clickOptionWithText(glossaryTermGroup).wait(3000);
         cy.deleteFromDropdown();
         cy.waitTextVisible("Deleted Term Group!");
-        //ensure it is no longer in the sidebar navigator
+
+        // Ensure it is no longer in the sidebar navigator
         cy.ensureTextNotPresent(glossaryTerm);
         cy.ensureTextNotPresent(glossaryTermGroup);
     });
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js
index 9e035f7f89772..85db210649c27 100644
--- a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js
@@ -5,8 +5,6 @@ const TASKS_ENTITY_TYPE = 'tasks';
 const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)';
 const JAN_1_2021_TIMESTAMP = 1609553357755;
 const JAN_1_2022_TIMESTAMP = 1641089357755;
-const TIMESTAMP_MILLIS_EIGHT_DAYS_AGO = getTimestampMillisNumDaysAgo(8);
-const TIMESTAMP_MILLIS_ONE_DAY_AGO = getTimestampMillisNumDaysAgo(1);
 const TIMESTAMP_MILLIS_14_DAYS_AGO = getTimestampMillisNumDaysAgo(14);
 const TIMESTAMP_MILLIS_7_DAYS_AGO = getTimestampMillisNumDaysAgo(7);
 const TIMESTAMP_MILLIS_NOW = getTimestampMillisNumDaysAgo(0);
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js
index fcc0566f3f6ce..99ad9a68d35e1 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js
@@ -5,7 +5,7 @@ const password = "Example password";
 const group_name = `Test group ${test_id}`;
 
 const addOwner = (owner, type, elementId) => {
-    cy.clickOptionWithText("Add Owners");
+    cy.clickOptionWithTestId("add-owners-button");
     cy.contains("Search for users or groups...").click({ force: true });
     cy.focused().type(owner);
     cy.clickOptionWithText(owner);
diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js
index 64bc1253fc383..5e3664f944edf 100644
--- a/smoke-test/tests/cypress/cypress/support/commands.js
+++ b/smoke-test/tests/cypress/cypress/support/commands.js
@@ -66,6 +66,7 @@ Cypress.Commands.add("logout", () => {
 Cypress.Commands.add("goToGlossaryList", () => {
   cy.visit("/glossary");
   cy.waitTextVisible("Glossary");
+  cy.wait(3000);
 });
 
 Cypress.Commands.add("goToDomainList", () => {

From 35d7770462e74cd95cf80b6df986fec715b7ce2c Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Fri, 20 Oct 2023 01:40:23 -0400
Subject: [PATCH 75/98] test(ingest/delta-lake): Fix integration tests (#9056)

---
 .../delta_lake_minio_mces_golden.json         |  48 ++--
 .../local/golden_mces_allow_table.json        | 261 ++++++++++++------
 .../local/golden_mces_inner_table.json        | 261 ++++++++++++------
 .../local/golden_mces_relative_path.json      |  62 +++--
 .../local/golden_mces_single_table.json       | 102 ++++---
 5 files changed, 480 insertions(+), 254 deletions(-)

diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json
index 52e92d27549f0..ed65d74037796 100644
--- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json
+++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json
@@ -136,7 +136,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -156,7 +157,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -171,7 +173,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -186,7 +189,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -203,7 +207,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -218,7 +223,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -238,7 +244,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -253,7 +260,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -268,7 +276,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -285,7 +294,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -300,7 +310,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -320,7 +331,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -335,7 +347,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -355,14 +368,16 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "version": "0"
             },
             "lastUpdatedTimestamp": 1655664815399
         }
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -386,7 +401,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1672531200000,
-        "runId": "delta-lake-test"
+        "runId": "delta-lake-test",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json
index 4dcdf71ce0095..6ec6eb2809a10 100644
--- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json
+++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json
@@ -94,7 +94,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -115,7 +116,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -130,7 +132,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -146,7 +149,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -163,7 +167,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -183,7 +188,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -204,7 +210,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -219,7 +226,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -235,7 +243,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -252,7 +261,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -267,7 +277,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -291,7 +302,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -312,7 +324,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -327,7 +340,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -343,7 +357,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -360,7 +375,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -375,7 +391,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -403,7 +420,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -424,7 +442,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -439,7 +458,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -455,7 +475,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -472,7 +493,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -487,7 +509,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -519,7 +542,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -540,7 +564,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -555,7 +580,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -571,7 +597,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -588,7 +615,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -603,7 +631,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -639,7 +668,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -654,7 +684,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -674,14 +705,17 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "readVersion": "3",
+                "version": "4"
             },
-            "lastUpdatedTimestamp": 1655831476907
+            "lastUpdatedTimestamp": 1655831477768
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -702,14 +736,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "0"
+                "readVersion": "2",
+                "version": "3"
             },
-            "lastUpdatedTimestamp": 1655831477701
+            "lastUpdatedTimestamp": 1655831477745
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -730,14 +766,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "1"
+                "readVersion": "1",
+                "version": "2"
             },
             "lastUpdatedTimestamp": 1655831477726
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -758,14 +796,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "2"
+                "readVersion": "0",
+                "version": "1"
             },
-            "lastUpdatedTimestamp": 1655831477745
+            "lastUpdatedTimestamp": 1655831477701
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -786,14 +826,15 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "version": "0"
             },
-            "lastUpdatedTimestamp": 1655831477768
+            "lastUpdatedTimestamp": 1655831476907
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -833,7 +874,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -973,7 +1015,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -988,7 +1031,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1008,14 +1052,16 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "version": "0"
             },
             "lastUpdatedTimestamp": 1655664815399
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1055,7 +1101,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1152,7 +1199,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1167,7 +1215,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1187,14 +1236,17 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "readVersion": "3",
+                "version": "4"
             },
-            "lastUpdatedTimestamp": 1655831649166
+            "lastUpdatedTimestamp": 1655831649788
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1215,14 +1267,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "0"
+                "readVersion": "2",
+                "version": "3"
             },
-            "lastUpdatedTimestamp": 1655831649715
+            "lastUpdatedTimestamp": 1655831649754
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1243,14 +1297,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "1"
+                "readVersion": "1",
+                "version": "2"
             },
             "lastUpdatedTimestamp": 1655831649731
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1271,14 +1327,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "2"
+                "readVersion": "0",
+                "version": "1"
             },
-            "lastUpdatedTimestamp": 1655831649754
+            "lastUpdatedTimestamp": 1655831649715
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1299,14 +1357,15 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "version": "0"
             },
-            "lastUpdatedTimestamp": 1655831649788
+            "lastUpdatedTimestamp": 1655831649166
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1346,7 +1405,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1444,7 +1504,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1465,7 +1526,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1480,7 +1542,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1496,7 +1559,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1513,7 +1577,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1528,7 +1593,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1568,7 +1634,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1583,7 +1650,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1603,14 +1671,17 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "readVersion": "3",
+                "version": "4"
             },
-            "lastUpdatedTimestamp": 1655831865396
+            "lastUpdatedTimestamp": 1655831866541
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1631,14 +1702,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "0"
+                "readVersion": "2",
+                "version": "3"
             },
-            "lastUpdatedTimestamp": 1655831866337
+            "lastUpdatedTimestamp": 1655831866447
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1659,14 +1732,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "1"
+                "readVersion": "1",
+                "version": "2"
             },
             "lastUpdatedTimestamp": 1655831866398
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1687,14 +1762,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "2"
+                "readVersion": "0",
+                "version": "1"
             },
-            "lastUpdatedTimestamp": 1655831866447
+            "lastUpdatedTimestamp": 1655831866337
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1715,14 +1792,15 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "version": "0"
             },
-            "lastUpdatedTimestamp": 1655831866541
+            "lastUpdatedTimestamp": 1655831865396
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1766,7 +1844,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "allow_table.json"
+        "runId": "allow_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json
index 901e4c1262d3f..715beebfe22fb 100644
--- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json
+++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json
@@ -94,7 +94,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -114,7 +115,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -129,7 +131,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -144,7 +147,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -161,7 +165,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -176,7 +181,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -196,7 +202,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -211,7 +218,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -226,7 +234,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -243,7 +252,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -258,7 +268,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -278,7 +289,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -298,7 +310,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -313,7 +326,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -328,7 +342,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -345,7 +360,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -360,7 +376,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -384,7 +401,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -404,7 +422,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -419,7 +438,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -434,7 +454,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -451,7 +472,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -466,7 +488,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -494,7 +517,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -514,7 +538,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -529,7 +554,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -544,7 +570,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -561,7 +588,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -576,7 +604,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -608,7 +637,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -623,7 +653,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -643,14 +674,17 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "readVersion": "3",
+                "version": "4"
             },
-            "lastUpdatedTimestamp": 1655831476907
+            "lastUpdatedTimestamp": 1655831477768
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -671,14 +705,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "0"
+                "readVersion": "2",
+                "version": "3"
             },
-            "lastUpdatedTimestamp": 1655831477701
+            "lastUpdatedTimestamp": 1655831477745
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -699,14 +735,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "1"
+                "readVersion": "1",
+                "version": "2"
             },
             "lastUpdatedTimestamp": 1655831477726
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -727,14 +765,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "2"
+                "readVersion": "0",
+                "version": "1"
             },
-            "lastUpdatedTimestamp": 1655831477745
+            "lastUpdatedTimestamp": 1655831477701
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -755,14 +795,15 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "version": "0"
             },
-            "lastUpdatedTimestamp": 1655831477768
+            "lastUpdatedTimestamp": 1655831476907
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -798,7 +839,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -938,7 +980,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -953,7 +996,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -973,14 +1017,16 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "version": "0"
             },
             "lastUpdatedTimestamp": 1655664815399
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1016,7 +1062,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1113,7 +1160,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1128,7 +1176,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1148,14 +1197,17 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "readVersion": "3",
+                "version": "4"
             },
-            "lastUpdatedTimestamp": 1655831649166
+            "lastUpdatedTimestamp": 1655831649788
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1176,14 +1228,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "0"
+                "readVersion": "2",
+                "version": "3"
             },
-            "lastUpdatedTimestamp": 1655831649715
+            "lastUpdatedTimestamp": 1655831649754
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1204,14 +1258,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "1"
+                "readVersion": "1",
+                "version": "2"
             },
             "lastUpdatedTimestamp": 1655831649731
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1232,14 +1288,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "2"
+                "readVersion": "0",
+                "version": "1"
             },
-            "lastUpdatedTimestamp": 1655831649754
+            "lastUpdatedTimestamp": 1655831649715
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1260,14 +1318,15 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "version": "0"
             },
-            "lastUpdatedTimestamp": 1655831649788
+            "lastUpdatedTimestamp": 1655831649166
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1303,7 +1362,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1401,7 +1461,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1421,7 +1482,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1436,7 +1498,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1451,7 +1514,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1468,7 +1532,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1483,7 +1548,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1519,7 +1585,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1534,7 +1601,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1554,14 +1622,17 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "readVersion": "3",
+                "version": "4"
             },
-            "lastUpdatedTimestamp": 1655831865396
+            "lastUpdatedTimestamp": 1655831866541
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1582,14 +1653,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "0"
+                "readVersion": "2",
+                "version": "3"
             },
-            "lastUpdatedTimestamp": 1655831866337
+            "lastUpdatedTimestamp": 1655831866447
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1610,14 +1683,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "1"
+                "readVersion": "1",
+                "version": "2"
             },
             "lastUpdatedTimestamp": 1655831866398
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1638,14 +1713,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "2"
+                "readVersion": "0",
+                "version": "1"
             },
-            "lastUpdatedTimestamp": 1655831866447
+            "lastUpdatedTimestamp": 1655831866337
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1666,14 +1743,15 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "version": "0"
             },
-            "lastUpdatedTimestamp": 1655831866541
+            "lastUpdatedTimestamp": 1655831865396
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1713,7 +1791,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "inner_table.json"
+        "runId": "inner_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json
index 18474e819334e..2076ec4096f68 100644
--- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json
+++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json
@@ -94,7 +94,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -114,7 +115,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -129,7 +131,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -144,7 +147,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -161,7 +165,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -176,7 +181,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -191,7 +197,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -211,14 +218,17 @@
             "customProperties": {
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
-                "isolationLevel": "Serializable"
+                "isolationLevel": "Serializable",
+                "readVersion": "3",
+                "version": "4"
             },
-            "lastUpdatedTimestamp": 1655831476907
+            "lastUpdatedTimestamp": 1655831477768
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -239,14 +249,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "0"
+                "readVersion": "2",
+                "version": "3"
             },
-            "lastUpdatedTimestamp": 1655831477701
+            "lastUpdatedTimestamp": 1655831477745
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -267,14 +279,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "1"
+                "readVersion": "1",
+                "version": "2"
             },
             "lastUpdatedTimestamp": 1655831477726
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -295,14 +309,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "2"
+                "readVersion": "0",
+                "version": "1"
             },
-            "lastUpdatedTimestamp": 1655831477745
+            "lastUpdatedTimestamp": 1655831477701
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -323,14 +339,15 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "version": "0"
             },
-            "lastUpdatedTimestamp": 1655831477768
+            "lastUpdatedTimestamp": 1655831476907
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -350,7 +367,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "relative_path.json"
+        "runId": "relative_path.json",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json
index bb47a077e878b..42e3b19612c2b 100644
--- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json
+++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json
@@ -93,7 +93,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -113,7 +114,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -128,7 +130,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -143,7 +146,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -160,7 +164,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -175,7 +180,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -195,7 +201,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -210,7 +217,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -225,7 +233,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -242,7 +251,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -257,7 +267,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -277,7 +288,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -297,7 +309,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -312,7 +325,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -327,7 +341,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -344,7 +359,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -359,7 +375,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -383,7 +400,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -403,7 +421,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -418,7 +437,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -433,7 +453,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -450,7 +471,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -465,7 +487,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -493,7 +516,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -513,7 +537,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -528,7 +553,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -543,7 +569,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -560,7 +587,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -575,7 +603,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -607,7 +636,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -622,7 +652,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -643,14 +674,16 @@
                 "engineInfo": "local Delta-Standalone/0.4.0",
                 "isBlindAppend": "True",
                 "isolationLevel": "Serializable",
-                "readVersion": "3"
+                "readVersion": "3",
+                "version": "4"
             },
             "lastUpdatedTimestamp": 1655831477768
         }
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -686,7 +719,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1615443388097,
-        "runId": "single_table.json"
+        "runId": "single_table.json",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file

From 3360fa222db42d587a4ae8fbc046628424f51373 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 20 Oct 2023 16:25:06 -0400
Subject: [PATCH 76/98] test(): Ingestion source creation cypress test (#8850)

---
 .../app/ingest/source/IngestionSourceList.tsx |  7 +-
 .../source/IngestionSourceTableColumns.tsx    | 12 +++-
 .../source/builder/CreateScheduleStep.tsx     |  6 +-
 .../ingest/source/builder/NameSourceStep.tsx  |  1 +
 .../ingest/source/builder/RecipeBuilder.tsx   | 18 ++++-
 .../cypress/e2e/mutations/ingestion_source.js | 68 +++++++++++++++++++
 6 files changed, 105 insertions(+), 7 deletions(-)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js

diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
index 6c91a0f6f3f8f..0e341a5ff3a79 100644
--- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
+++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
@@ -358,7 +358,12 @@ export const IngestionSourceList = () => {
             <SourceContainer>
                 <TabToolbar>
                     <div>
-                        <Button id={INGESTION_CREATE_SOURCE_ID} type="text" onClick={() => setIsBuildingSource(true)}>
+                        <Button
+                            id={INGESTION_CREATE_SOURCE_ID}
+                            type="text"
+                            onClick={() => setIsBuildingSource(true)}
+                            data-testid="create-ingestion-source-button"
+                        >
                             <PlusOutlined /> Create new source
                         </Button>
                         <Button id={INGESTION_REFRESH_SOURCES_ID} type="text" onClick={onRefresh}>
diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceTableColumns.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceTableColumns.tsx
index 2b8ecb860ed0c..c47d08d5b6003 100644
--- a/datahub-web-react/src/app/ingest/source/IngestionSourceTableColumns.tsx
+++ b/datahub-web-react/src/app/ingest/source/IngestionSourceTableColumns.tsx
@@ -119,7 +119,11 @@ export function LastStatusColumn({ status, record, setFocusExecutionUrn }: LastS
     return (
         <StatusContainer>
             {Icon && <Icon style={{ color, fontSize: 14 }} />}
-            <StatusButton type="link" onClick={() => setFocusExecutionUrn(record.lastExecUrn)}>
+            <StatusButton
+                data-testid="ingestion-source-table-status"
+                type="link"
+                onClick={() => setFocusExecutionUrn(record.lastExecUrn)}
+            >
                 <Typography.Text strong style={{ color, marginLeft: 8 }}>
                     {text || 'Pending...'}
                 </Typography.Text>
@@ -159,7 +163,11 @@ export function ActionsColumn({
                 </Tooltip>
             )}
             {!record.cliIngestion && (
-                <Button style={{ marginRight: 16 }} onClick={() => onEdit(record.urn)}>
+                <Button
+                    data-testid="ingestion-source-table-edit-button"
+                    style={{ marginRight: 16 }}
+                    onClick={() => onEdit(record.urn)}
+                >
                     EDIT
                 </Button>
             )}
diff --git a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx
index dba9b25e14e99..7a14b6a794189 100644
--- a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx
+++ b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx
@@ -167,7 +167,11 @@ export const CreateScheduleStep = ({ state, updateState, goTo, prev }: StepProps
             <ControlsContainer>
                 <Button onClick={prev}>Previous</Button>
                 <div>
-                    <Button disabled={!interval || interval.length === 0 || cronAsText.error} onClick={onClickNext}>
+                    <Button
+                        data-testid="ingestion-schedule-next-button"
+                        disabled={!interval || interval.length === 0 || cronAsText.error}
+                        onClick={onClickNext}
+                    >
                         Next
                     </Button>
                 </div>
diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
index 913f8253ece5a..992ebff643c31 100644
--- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
+++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
@@ -123,6 +123,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
                 <Button onClick={prev}>Previous</Button>
                 <div>
                     <SaveButton
+                        data-testid="ingestion-source-save-button"
                         disabled={!(state.name !== undefined && state.name.length > 0)}
                         onClick={() => onClickCreate(false)}
                     >
diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx
index 4ddeb7b492595..bee9b04cee100 100644
--- a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx
+++ b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx
@@ -86,10 +86,20 @@ function RecipeBuilder(props: Props) {
                     {sourceConfigs?.displayName} Recipe
                 </Title>
                 <ButtonsWrapper>
-                    <StyledButton type="text" isSelected={isViewingForm} onClick={() => switchViews(true)}>
+                    <StyledButton
+                        type="text"
+                        isSelected={isViewingForm}
+                        onClick={() => switchViews(true)}
+                        data-testid="recipe-builder-form-button"
+                    >
                         <FormOutlined /> Form
                     </StyledButton>
-                    <StyledButton type="text" isSelected={!isViewingForm} onClick={() => switchViews(false)}>
+                    <StyledButton
+                        type="text"
+                        isSelected={!isViewingForm}
+                        onClick={() => switchViews(false)}
+                        data-testid="recipe-builder-yaml-button"
+                    >
                         <CodeOutlined /> YAML
                     </StyledButton>
                 </ButtonsWrapper>
@@ -114,7 +124,9 @@ function RecipeBuilder(props: Props) {
                         <Button disabled={isEditing} onClick={goToPrevious}>
                             Previous
                         </Button>
-                        <Button onClick={onClickNext}>Next</Button>
+                        <Button data-testid="recipe-builder-next-button" onClick={onClickNext}>
+                            Next
+                        </Button>
                     </ControlsContainer>
                 </>
             )}
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js
new file mode 100644
index 0000000000000..6c5dd77810644
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js
@@ -0,0 +1,68 @@
+
+const number = Math.floor(Math.random() * 100000);
+const accound_id = `account${number}`;
+const warehouse_id = `warehouse${number}`;
+const username = `user${number}`;
+const password = `password${number}`;
+const role = `role${number}`;
+const ingestion_source_name = `ingestion source ${number}`;
+
+describe("ingestion source creation flow", () => {
+    it("create a ingestion source using ui, verify ingestion source details saved correctly, remove ingestion source", () => {
+      // Go to ingestion page, create a snowflake source
+      cy.loginWithCredentials();
+      cy.goToIngestionPage();
+      cy.clickOptionWithTestId("create-ingestion-source-button");
+      cy.clickOptionWithText("Snowflake");
+      cy.waitTextVisible("Snowflake Recipe");
+      cy.get("#account_id").type(accound_id);
+      cy.get("#warehouse").type(warehouse_id);
+      cy.get("#username").type(username);
+      cy.get("#password").type(password);
+      cy.focused().blur();
+      cy.get("#role").type(role);
+
+      // Verify yaml recipe is generated correctly
+      cy.clickOptionWithTestId("recipe-builder-yaml-button");
+      cy.waitTextVisible("account_id");
+      cy.waitTextVisible(accound_id);
+      cy.waitTextVisible(warehouse_id);
+      cy.waitTextVisible(username);
+      cy.waitTextVisible(password);
+      cy.waitTextVisible(role);
+
+      // Finish creating source
+      cy.clickOptionWithTestId("recipe-builder-next-button");
+      cy.waitTextVisible("Configure an Ingestion Schedule");
+      cy.clickOptionWithTestId("ingestion-schedule-next-button");
+      cy.waitTextVisible("Give this ingestion source a name."); 
+      cy.get('[data-testid="source-name-input"]').type(ingestion_source_name);
+      cy.clickOptionWithTestId("ingestion-source-save-button");
+      cy.waitTextVisible("Successfully created ingestion source!").wait(5000)
+      cy.waitTextVisible(ingestion_source_name);
+      cy.get('[data-testid="ingestion-source-table-status"]').contains("Pending...").should("be.visible");
+
+      // Verify ingestion source details are saved correctly
+      cy.get('[data-testid="ingestion-source-table-edit-button"]').first().click();
+      cy.waitTextVisible("Edit Ingestion Source");
+      cy.get("#account_id").should("have.value", accound_id);
+      cy.get("#warehouse").should("have.value", warehouse_id);
+      cy.get("#username").should("have.value", username);
+      cy.get("#password").should("have.value", password);
+      cy.get("#role").should("have.value", role);
+      cy.get("button").contains("Next").click();
+      cy.waitTextVisible("Configure an Ingestion Schedule");
+      cy.clickOptionWithTestId("ingestion-schedule-next-button");
+      cy.get('[data-testid="source-name-input"]').clear().type(ingestion_source_name + " EDITED");
+      cy.clickOptionWithTestId("ingestion-source-save-button");
+      cy.waitTextVisible("Successfully updated ingestion source!");
+      cy.waitTextVisible(ingestion_source_name + " EDITED");
+
+      // Remove ingestion source
+      cy.get('[data-testid="delete-button"]').first().click();
+      cy.waitTextVisible("Confirm Ingestion Source Removal");
+      cy.get("button").contains("Yes").click();
+      cy.waitTextVisible("Removed ingestion source.");
+      cy.ensureTextNotPresent(ingestion_source_name + " EDITED")
+    })
+});
\ No newline at end of file

From 2fea466d48c856f5c469af6f611990a200e5bece Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Fri, 20 Oct 2023 13:47:52 -0700
Subject: [PATCH 77/98] docs: fix lineage capability annotations (#8954)

---
 .../src/datahub/ingestion/source/aws/glue.py           |  1 +
 .../datahub/ingestion/source/bigquery_v2/bigquery.py   |  1 +
 .../src/datahub/ingestion/source/kafka_connect.py      |  1 +
 .../datahub/ingestion/source/looker/looker_source.py   |  6 +++++-
 .../src/datahub/ingestion/source/metabase.py           |  1 +
 .../src/datahub/ingestion/source/metadata/lineage.py   | 10 +++++++++-
 .../src/datahub/ingestion/source/mode.py               |  1 +
 .../src/datahub/ingestion/source/nifi.py               |  4 +++-
 .../src/datahub/ingestion/source/powerbi/powerbi.py    |  5 ++++-
 .../src/datahub/ingestion/source/sql_queries.py        | 10 +++++++++-
 .../src/datahub/ingestion/source/superset.py           |  1 +
 .../src/datahub/ingestion/source/tableau.py            |  6 +++++-
 12 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
index e5dff786b71d1..aa7e5aa352a3e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
@@ -221,6 +221,7 @@ def report_table_dropped(self, table: str) -> None:
     SourceCapability.DELETION_DETECTION,
     "Enabled by default when stateful ingestion is turned on.",
 )
+@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
 class GlueSource(StatefulIngestionSourceBase):
     """
     Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 552612f877b9a..692d8c4f81bb6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -153,6 +153,7 @@ def cleanup(config: BigQueryV2Config) -> None:
 )
 @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
 @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
+@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration")
 @capability(
     SourceCapability.USAGE_STATS,
     "Enabled by default, can be disabled via configuration `include_usage_statistics`",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
index 5fae0ee5215a3..1a1e012e80633 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
@@ -1096,6 +1096,7 @@ def transform_connector_config(
 @config_class(KafkaConnectSourceConfig)
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
 class KafkaConnectSource(StatefulIngestionSourceBase):
     config: KafkaConnectSourceConfig
     report: KafkaConnectSourceReport
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index 8297a0aa8efa7..a3df977582ca4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -103,6 +103,11 @@
 @capability(
     SourceCapability.OWNERSHIP, "Enabled by default, configured using `extract_owners`"
 )
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
+@capability(
+    SourceCapability.LINEAGE_FINE,
+    "Enabled by default, configured using `extract_column_level_lineage`",
+)
 @capability(
     SourceCapability.USAGE_STATS,
     "Enabled by default, configured using `extract_usage_history`",
@@ -1128,7 +1133,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
     def emit_independent_looks_mcp(
         self, dashboard_element: LookerDashboardElement
     ) -> Iterable[MetadataWorkUnit]:
-
         yield from auto_workunit(
             stream=self._make_chart_metadata_events(
                 dashboard_element=dashboard_element,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
index fb4512893feb1..24145d60210ff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
@@ -80,6 +80,7 @@ def remove_trailing_slash(cls, v):
 @config_class(MetabaseConfig)
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
 class MetabaseSource(Source):
     """
     This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py
index 1c0c809c16a60..f33c6e0edae3d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py
@@ -23,11 +23,17 @@
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
     SupportStatus,
+    capability,
     config_class,
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
+from datahub.ingestion.api.source import (
+    MetadataWorkUnitProcessor,
+    Source,
+    SourceCapability,
+    SourceReport,
+)
 from datahub.ingestion.api.source_helpers import (
     auto_status_aspect,
     auto_workunit_reporter,
@@ -121,6 +127,8 @@ def version_must_be_1(cls, v):
 @platform_name("File Based Lineage")
 @config_class(LineageFileSourceConfig)
 @support_status(SupportStatus.CERTIFIED)
+@capability(SourceCapability.LINEAGE_COARSE, "Specified in the lineage file.")
+@capability(SourceCapability.LINEAGE_FINE, "Specified in the lineage file.")
 @dataclass
 class LineageFileSource(Source):
     """
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py
index a000c66a406c2..c46b56da422d9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mode.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py
@@ -98,6 +98,7 @@ class HTTPError429(HTTPError):
 @config_class(ModeConfig)
 @support_status(SupportStatus.CERTIFIED)
 @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
 class ModeSource(Source):
     """
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
index ac1e03812db3b..bc05edbb3c623 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
@@ -26,11 +26,12 @@
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
     SupportStatus,
+    capability,
     config_class,
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.source import Source, SourceCapability, SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.metadata.schema_classes import (
     DataFlowInfoClass,
@@ -360,6 +361,7 @@ def report_dropped(self, ent_name: str) -> None:
 @platform_name("NiFi", id="nifi")
 @config_class(NifiSourceConfig)
 @support_status(SupportStatus.CERTIFIED)
+@capability(SourceCapability.LINEAGE_COARSE, "Supported. See docs for limitations")
 class NifiSource(Source):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 52bcef66658c8..4611a8eed4782 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -264,7 +264,6 @@ def extract_lineage(
                 )
 
         if len(upstream) > 0:
-
             upstream_lineage_class: UpstreamLineageClass = UpstreamLineageClass(
                 upstreams=upstream,
                 fineGrainedLineages=cll_lineage or None,
@@ -1139,6 +1138,10 @@ def report_to_datahub_work_units(
     SourceCapability.OWNERSHIP,
     "Disabled by default, configured using `extract_ownership`",
 )
+@capability(
+    SourceCapability.LINEAGE_COARSE,
+    "Enabled by default, configured using `extract_lineage`.",
+)
 @capability(
     SourceCapability.LINEAGE_FINE,
     "Disabled by default, configured using `extract_column_level_lineage`. ",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
index bce4d1ec76e6e..fcf97e461967c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
@@ -20,11 +20,17 @@
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
     SupportStatus,
+    capability,
     config_class,
     platform_name,
     support_status,
 )
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
+from datahub.ingestion.api.source import (
+    MetadataWorkUnitProcessor,
+    Source,
+    SourceCapability,
+    SourceReport,
+)
 from datahub.ingestion.api.source_helpers import auto_workunit_reporter
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.graph.client import DataHubGraph
@@ -83,6 +89,8 @@ def compute_stats(self) -> None:
 @platform_name("SQL Queries")
 @config_class(SqlQueriesSourceConfig)
 @support_status(SupportStatus.TESTING)
+@capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries")
+@capability(SourceCapability.LINEAGE_FINE, "Parsed from SQL queries")
 class SqlQueriesSource(Source):
     # TODO: Documentation
     urns: Optional[Set[str]]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py
index 14bc4242d2a91..e491a1e8b82fa 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/superset.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py
@@ -142,6 +142,7 @@ def get_filter_name(filter_obj):
 @capability(
     SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
 )
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
 class SupersetSource(StatefulIngestionSourceBase):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index bad7ae49d325e..4bc40b0aac964 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -452,6 +452,10 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
 @capability(SourceCapability.OWNERSHIP, "Requires recipe configuration")
 @capability(SourceCapability.TAGS, "Requires recipe configuration")
 @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
+@capability(
+    SourceCapability.LINEAGE_FINE,
+    "Enabled by default, configure using `extract_column_level_lineage`",
+)
 class TableauSource(StatefulIngestionSourceBase):
     platform = "tableau"
 
@@ -533,7 +537,7 @@ def fetch_projects():
                     path=[],
                 )
             # Set parent project name
-            for project_id, project in all_project_map.items():
+            for _project_id, project in all_project_map.items():
                 if (
                     project.parent_id is not None
                     and project.parent_id in all_project_map

From 4d35a254cabb3a6241af8857c7d63298783ebaa7 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 20 Oct 2023 17:09:14 -0400
Subject: [PATCH 78/98] =?UTF-8?q?Added=20more=20data-testid=20usage=20for?=
 =?UTF-8?q?=20edit=5Fdocumentation=20and=20managing=5Fsecr=E2=80=A6=20(#90?=
 =?UTF-8?q?60)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../components/legacy/DescriptionModal.tsx    |  6 ++-
 .../shared/components/styled/AddLinkModal.tsx |  6 ++-
 .../tabs/Documentation/DocumentationTab.tsx   |  1 +
 .../components/DescriptionEditorToolbar.tsx   |  2 +-
 .../app/ingest/secret/SecretBuilderModal.tsx  |  4 ++
 .../src/app/ingest/secret/SecretsList.tsx     |  6 ++-
 .../e2e/glossary/glossary_navigation.js       |  3 +-
 .../e2e/mutations/edit_documentation.js       | 42 +++++++--------
 .../cypress/e2e/mutations/managing_secrets.js | 51 ++++++++++---------
 9 files changed, 70 insertions(+), 51 deletions(-)

diff --git a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx
index 579b8c9905da0..cb37c44a36caa 100644
--- a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx
@@ -41,7 +41,11 @@ export default function UpdateDescriptionModal({ title, description, original, o
             footer={
                 <>
                     <Button onClick={onClose}>Cancel</Button>
-                    <Button onClick={() => onSubmit(updatedDesc)} disabled={updatedDesc === description}>
+                    <Button
+                        onClick={() => onSubmit(updatedDesc)}
+                        disabled={updatedDesc === description}
+                        data-testid="description-modal-update-button"
+                    >
                         Update
                     </Button>
                 </>
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx
index 34d4f0cb3fe91..68a8cf4094362 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx
@@ -57,7 +57,7 @@ export const AddLinkModal = ({ buttonProps, refetch }: AddLinkProps) => {
 
     return (
         <>
-            <Button icon={<PlusOutlined />} onClick={showModal} {...buttonProps}>
+            <Button data-testid="add-link-button" icon={<PlusOutlined />} onClick={showModal} {...buttonProps}>
                 Add Link
             </Button>
             <Modal
@@ -69,13 +69,14 @@ export const AddLinkModal = ({ buttonProps, refetch }: AddLinkProps) => {
                     <Button type="text" onClick={handleClose}>
                         Cancel
                     </Button>,
-                    <Button form="addLinkForm" key="submit" htmlType="submit">
+                    <Button data-testid="add-link-modal-add-button" form="addLinkForm" key="submit" htmlType="submit">
                         Add
                     </Button>,
                 ]}
             >
                 <Form form={form} name="addLinkForm" onFinish={handleAdd} layout="vertical">
                     <Form.Item
+                        data-testid="add-link-modal-url"
                         name="url"
                         label="URL"
                         rules={[
@@ -93,6 +94,7 @@ export const AddLinkModal = ({ buttonProps, refetch }: AddLinkProps) => {
                         <Input placeholder="https://" autoFocus />
                     </Form.Item>
                     <Form.Item
+                        data-testid="add-link-modal-label"
                         name="label"
                         label="Label"
                         rules={[
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx
index de065d23e56e7..344c2aef87175 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx
@@ -60,6 +60,7 @@ export const DocumentationTab = ({ properties }: { properties?: Props }) => {
                     <TabToolbar>
                         <div>
                             <Button
+                                data-testid="edit-documentation-button"
                                 type="text"
                                 onClick={() => routeToTab({ tabName: 'Documentation', tabParams: { editing: true } })}
                             >
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx
index 6128a5f277c85..07e197049ccc8 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx
@@ -15,7 +15,7 @@ export const DescriptionEditorToolbar = ({ disableSave, onClose, onSave }: Descr
             <Button type="text" onClick={onClose}>
                 Back
             </Button>
-            <Button onClick={onSave} disabled={disableSave}>
+            <Button data-testid="description-editor-save-button" onClick={onSave} disabled={disableSave}>
                 <CheckOutlined /> Save
             </Button>
         </TabToolbar>
diff --git a/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx b/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx
index 539eef972608c..30f04d61b8fc9 100644
--- a/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx
+++ b/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx
@@ -40,6 +40,7 @@ export const SecretBuilderModal = ({ initialState, visible, onSubmit, onCancel }
                         Cancel
                     </Button>
                     <Button
+                        data-testid="secret-modal-create-button"
                         id="createSecretButton"
                         onClick={() =>
                             onSubmit?.(
@@ -71,6 +72,7 @@ export const SecretBuilderModal = ({ initialState, visible, onSubmit, onCancel }
                         Give your secret a name. This is what you&apos;ll use to reference the secret from your recipes.
                     </Typography.Paragraph>
                     <Form.Item
+                        data-testid="secret-modal-name-input"
                         name={NAME_FIELD_NAME}
                         rules={[
                             {
@@ -91,6 +93,7 @@ export const SecretBuilderModal = ({ initialState, visible, onSubmit, onCancel }
                         The value of your secret, which will be encrypted and stored securely within DataHub.
                     </Typography.Paragraph>
                     <Form.Item
+                        data-testid="secret-modal-value-input"
                         name={VALUE_FIELD_NAME}
                         rules={[
                             {
@@ -110,6 +113,7 @@ export const SecretBuilderModal = ({ initialState, visible, onSubmit, onCancel }
                         An optional description to help keep track of your secret.
                     </Typography.Paragraph>
                     <Form.Item
+                        data-testid="secret-modal-description-input"
                         name={DESCRIPTION_FIELD_NAME}
                         rules={[{ whitespace: true }, { min: 1, max: 500 }]}
                         hasFeedback
diff --git a/datahub-web-react/src/app/ingest/secret/SecretsList.tsx b/datahub-web-react/src/app/ingest/secret/SecretsList.tsx
index 69e11d1471db3..8e5b601e2a809 100644
--- a/datahub-web-react/src/app/ingest/secret/SecretsList.tsx
+++ b/datahub-web-react/src/app/ingest/secret/SecretsList.tsx
@@ -176,7 +176,11 @@ export const SecretsList = () => {
             <div>
                 <TabToolbar>
                     <div>
-                        <Button type="text" onClick={() => setIsCreatingSecret(true)}>
+                        <Button
+                            data-testid="create-secret-button"
+                            type="text"
+                            onClick={() => setIsCreatingSecret(true)}
+                        >
                             <PlusOutlined /> Create new secret
                         </Button>
                     </div>
diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
index f52e4d3984a88..aeceaf99be889 100644
--- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
@@ -17,8 +17,7 @@ describe("glossary sidebar navigation test", () => {
         cy.waitTextVisible("Created Term Group!");
         cy.waitTextVisible("Create Glossary Term");
         cy.enterTextInTestId("create-glossary-entity-modal-name", glossaryTerm);
-        cy.clickOptionWithTestId("glossary-entity-modal-create-button");
-        cy.waitTextVisible("Created Glossary Term!");
+        cy.clickOptionWithTestId("glossary-entity-modal-create-button").wait(3000);
         cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTerm).click().wait(3000);
         cy.openThreeDotDropdown();
         cy.clickOptionWithTestId("entity-menu-move-button")
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
index 83b66e2cb2549..5f9758a35ca0e 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
@@ -10,20 +10,20 @@ describe("edit documentation and link to dataset", () => {
     cy.visit(
       "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"
     );
-    cy.get("[role='tab']").contains("Documentation").click();
+    cy.openEntityTab("Documentation");
     cy.waitTextVisible("my hive dataset");
     cy.waitTextVisible("Sample doc");
-    cy.clickOptionWithText("Edit");
+    cy.clickOptionWithTestId("edit-documentation-button");
     cy.focused().clear();
     cy.focused().type(documentation_edited);
-    cy.get("button").contains("Save").click();
+    cy.clickOptionWithTestId("description-editor-save-button");
     cy.waitTextVisible("Description Updated");
     cy.waitTextVisible(documentation_edited);
     //return documentation to original state
-    cy.clickOptionWithText("Edit");
+    cy.clickOptionWithTestId("edit-documentation-button");
     cy.focused().clear().wait(1000);
     cy.focused().type("my hive dataset");
-    cy.get("button").contains("Save").click();
+    cy.clickOptionWithTestId("description-editor-save-button");
     cy.waitTextVisible("Description Updated");
     cy.waitTextVisible("my hive dataset");
   });
@@ -33,21 +33,21 @@ describe("edit documentation and link to dataset", () => {
     cy.visit(
       "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"
     );
-    cy.get("[role='tab']").contains("Documentation").click();
+    cy.openEntityTab("Documentation");
     cy.contains("Sample doc").trigger("mouseover", { force: true });
     cy.get('[data-icon="delete"]').click();
     cy.waitTextVisible("Link Removed");
-    cy.get("button").contains("Add Link").click().wait(1000);
-    cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url);
+    cy.clickOptionWithTestId("add-link-button").wait(1000);
+    cy.enterTextInTestId("add-link-modal-url", wrong_url);
     cy.waitTextVisible("This field must be a valid url.");
     cy.focused().clear();
     cy.waitTextVisible("A URL is required.");
-    cy.focused().type(correct_url);
+    cy.enterTextInTestId("add-link-modal-url", correct_url);
     cy.ensureTextNotPresent("This field must be a valid url.");
-    cy.get("#addLinkForm_label").type("Sample doc");
-    cy.get('[role="dialog"] button').contains("Add").click();
+    cy.enterTextInTestId("add-link-modal-label", "Sample doc");
+    cy.clickOptionWithTestId("add-link-modal-add-button");
     cy.waitTextVisible("Link Added");
-    cy.get("[role='tab']").contains("Documentation").click();
+    cy.openEntityTab("Documentation");
     cy.get(`[href='${correct_url}']`).should("be.visible");
   });
 
@@ -55,18 +55,18 @@ describe("edit documentation and link to dataset", () => {
     cy.loginWithCredentials();
     cy.visit("/domain/urn:li:domain:marketing/Entities");
     cy.waitTextVisible("SampleCypressKafkaDataset");
-    cy.get("button").contains("Add Link").click().wait(1000);
-    cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url);
+    cy.clickOptionWithTestId("add-link-button").wait(1000);
+    cy.enterTextInTestId("add-link-modal-url", wrong_url);
     cy.waitTextVisible("This field must be a valid url.");
     cy.focused().clear();
     cy.waitTextVisible("A URL is required.");
-    cy.focused().type(correct_url);
+    cy.enterTextInTestId("add-link-modal-url", correct_url);
     cy.ensureTextNotPresent("This field must be a valid url.");
-    cy.get("#addLinkForm_label").type("Sample doc");
-    cy.get('[role="dialog"] button').contains("Add").click();
+    cy.enterTextInTestId("add-link-modal-label", "Sample doc");
+    cy.clickOptionWithTestId("add-link-modal-add-button");
     cy.waitTextVisible("Link Added");
-    cy.get("[role='tab']").contains("Documentation").click();
-    cy.waitTextVisible("Edit");
+    cy.openEntityTab("Documentation");
+    cy.get("[data-testid='edit-documentation-button']").should("be.visible");
     cy.get(`[href='${correct_url}']`).should("be.visible");
     cy.contains("Sample doc").trigger("mouseover", { force: true });
     cy.get('[data-icon="delete"]').click();
@@ -83,14 +83,14 @@ describe("edit documentation and link to dataset", () => {
     cy.waitTextVisible("Foo field description has changed");
     cy.focused().clear().wait(1000);
     cy.focused().type(documentation_edited);
-    cy.get("button").contains("Update").click();
+    cy.clickOptionWithTestId("description-modal-update-button");
     cy.waitTextVisible("Updated!");
     cy.waitTextVisible(documentation_edited);
     cy.waitTextVisible("(edited)");
     cy.get("tbody [data-icon='edit']").first().click({ force: true });
     cy.focused().clear().wait(1000);
     cy.focused().type("Foo field description has changed");
-    cy.get("button").contains("Update").click();
+    cy.clickOptionWithTestId("description-modal-update-button");
     cy.waitTextVisible("Updated!");
     cy.waitTextVisible("Foo field description has changed");
     cy.waitTextVisible("(edited)");
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
index 466bb2ef0757e..77fd63b9cae02 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
@@ -8,23 +8,24 @@ const ingestion_source_name = `ingestion source ${number}`;
 
 describe("managing secrets for ingestion creation", () => {
     it("create a secret, create ingestion source using a secret, remove a secret", () => {
+      // Navigate to the manage ingestion page → secrets
       cy.loginWithCredentials();
-      //navigate to the manage ingestion page → secrets
       cy.goToIngestionPage();
-      cy.clickOptionWithText("Secrets");
-      //create a new secret
-      cy.clickOptionWithText("Create new secret");
-      cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible");
-      cy.get('[role="dialog"] #name').type(`secretname${number}`);
-      cy.get('[role="dialog"] #value').type(`secretvalue${number}`);
-      cy.get('[role="dialog"] #description').type(`secretdescription${number}`);
-      cy.get('#createSecretButton').click();
+      cy.openEntityTab("Secrets");
+
+      // Create a new secret
+      cy.clickOptionWithTestId("create-secret-button");
+      cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`);
+      cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`);
+      cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`);
+      cy.clickOptionWithTestId("secret-modal-create-button");
       cy.waitTextVisible("Successfully created Secret!");
       cy.waitTextVisible(`secretname${number}`);
-      cy.waitTextVisible(`secretdescription${number}`).wait(5000)//prevent issue with missing secret
-      //create an ingestion source using a secret
+      cy.waitTextVisible(`secretdescription${number}`).wait(5000)
+
+      // Create an ingestion source using a secret
       cy.goToIngestionPage();
-      cy.clickOptionWithText("Create new source");
+      cy.get("#ingestion-create-source").click();
       cy.clickOptionWithText("Snowflake");
       cy.waitTextVisible("Snowflake Recipe");
       cy.get("#account_id").type(accound_id);
@@ -40,11 +41,12 @@ describe("managing secrets for ingestion creation", () => {
       cy.waitTextVisible("Give this ingestion source a name."); 
       cy.get('[data-testid="source-name-input"]').type(ingestion_source_name);
       cy.get("button").contains("Save").click();
-      cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data
+      cy.waitTextVisible("Successfully created ingestion source!").wait(5000)
       cy.waitTextVisible(ingestion_source_name);
       cy.get("button").contains("Pending...").should("be.visible");
-      //remove a secret
-      cy.clickOptionWithText("Secrets");
+
+      // Remove a secret
+      cy.openEntityTab("Secrets");
       cy.waitTextVisible(`secretname${number}`);
       cy.get('[data-icon="delete"]').first().click();
       cy.waitTextVisible("Confirm Secret Removal");
@@ -52,14 +54,16 @@ describe("managing secrets for ingestion creation", () => {
       cy.waitTextVisible("Removed secret.");
       cy.ensureTextNotPresent(`secretname${number}`);
       cy.ensureTextNotPresent(`secretdescription${number}`);
-      //remove ingestion source
+
+      // Remove ingestion source
       cy.goToIngestionPage();
       cy.get('[data-testid="delete-button"]').first().click();
       cy.waitTextVisible("Confirm Ingestion Source Removal");
       cy.get("button").contains("Yes").click();
       cy.waitTextVisible("Removed ingestion source.");
       cy.ensureTextNotPresent(ingestion_source_name)
-      //verify secret is not present during ingestion source creation for password dropdown
+
+      // Verify secret is not present during ingestion source creation for password dropdown
       cy.clickOptionWithText("Create new source");
       cy.clickOptionWithText("Snowflake");
       cy.waitTextVisible("Snowflake Recipe");
@@ -68,13 +72,13 @@ describe("managing secrets for ingestion creation", () => {
       cy.get("#username").type(username);
       cy.get("#password").click().wait(1000);
       cy.ensureTextNotPresent(`secretname${number}`);
-      //verify secret can be added during ingestion source creation and used successfully
+
+      // Verify secret can be added during ingestion source creation and used successfully
       cy.clickOptionWithText("Create Secret");    
-      cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible");
-      cy.get('[role="dialog"] #name').type(`secretname${number}`);
-      cy.get('[role="dialog"] #value').type(`secretvalue${number}`);
-      cy.get('[role="dialog"] #description').type(`secretdescription${number}`);
-      cy.get('#createSecretButton').click();
+      cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`)
+      cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`)
+      cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`)
+      cy.clickOptionWithTestId("secret-modal-create-button");
       cy.waitTextVisible("Created secret!");
       cy.get("#role").type(role);
       cy.get("button").contains("Next").click();
@@ -86,6 +90,7 @@ describe("managing secrets for ingestion creation", () => {
       cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data
       cy.waitTextVisible(ingestion_source_name);
       cy.get("button").contains("Pending...").should("be.visible");
+
       //Remove ingestion source and secret
       cy.goToIngestionPage();
       cy.get('[data-testid="delete-button"]').first().click();

From 63599c95553b89304b656efb2c208c9084d60717 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Sat, 21 Oct 2023 03:17:28 -0700
Subject: [PATCH 79/98] fix(search): fix mapping builder bug (#9062)

---
 .../search/elasticsearch/indexbuilder/MappingsBuilder.java  | 2 +-
 .../timeseries/search/TimeseriesAspectServiceTestBase.java  | 6 ++++--
 .../io/datahubproject/test/search/SearchTestContainer.java  | 2 +-
 smoke-test/tests/containers/containers_test.py              | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
index 1edc77bbd214c..35cef71edd953 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
@@ -133,7 +133,7 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
     } else if (fieldType == FieldType.DATETIME) {
       mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
     } else if (fieldType == FieldType.OBJECT) {
-      mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
+      mappingForField.put(TYPE, ESUtils.OBJECT_FIELD_TYPE);
     } else {
       log.info("FieldType {} has no mappings implemented", fieldType);
     }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
index f9b8f84b10ad2..b19d2026fbfc4 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
@@ -889,15 +889,17 @@ public void testCountByFilterAfterDelete() throws InterruptedException {
   @Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
   public void testGetIndexSizes() {
     List<TimeseriesIndexSizeResult> result = _elasticSearchTimeseriesAspectService.getIndexSizes();
+    //CHECKSTYLE:OFF
     /*
     Example result:
     {aspectName=testentityprofile, sizeMb=52.234, indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity}
     {aspectName=testentityprofile, sizeMb=0.208, indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests}
      */
     // There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there
-    assertTrue(result.size() > 1);
+    //CHECKSTYLE:ON
+    assertTrue(result.size() > 0);
     assertTrue(
         result.stream().anyMatch(idxSizeResult -> idxSizeResult.getIndexName().equals(
-            "es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1")));
+            "es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1")));
   }
 }
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
index 67e1ee368f513..4c1555fc510e6 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
@@ -5,7 +5,7 @@
 import java.time.Duration;
 
 public interface SearchTestContainer {
-    String SEARCH_JAVA_OPTS = "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456";
+    String SEARCH_JAVA_OPTS = "-Xms446m -Xmx446m -XX:MaxDirectMemorySize=368435456";
     Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min
 
     GenericContainer<?> startContainer();
diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py
index 05a45239dabf8..227645a87d30a 100644
--- a/smoke-test/tests/containers/containers_test.py
+++ b/smoke-test/tests/containers/containers_test.py
@@ -227,6 +227,7 @@ def test_update_container(frontend_session, ingest_cleanup_data):
                 "ownerUrn": new_owner,
                 "resourceUrn": container_urn,
                 "ownerEntityType": "CORP_USER",
+                "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner"
             }
         },
     }

From 86e0023a4e158467130f7337478a48bf98fb344b Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Sat, 21 Oct 2023 16:20:59 +0100
Subject: [PATCH 80/98] feat(ingestion): Adds more advanced configurations for
 runtime debugging (#8998)

---
 .../ingest/IngestionResolverUtils.java        |  10 ++
 ...eateIngestionExecutionRequestResolver.java |   3 +
 .../source/UpsertIngestionSourceResolver.java |  10 ++
 .../src/main/resources/ingestion.graphql      |  10 ++
 .../UpsertIngestionSourceResolverTest.java    |   2 +-
 .../app/ingest/source/IngestionSourceList.tsx |   8 +-
 .../ingest/source/builder/NameSourceStep.tsx  | 123 +++++++++++++++++-
 .../src/app/ingest/source/builder/types.ts    |  17 +++
 .../src/graphql/ingestion.graphql             |   8 ++
 docker/build.gradle                           |  12 +-
 docs/ui-ingestion.md                          |  20 ++-
 .../docs/dev_guides/profiling_ingestions.md   |  39 ++++++
 .../TimeseriesAspectServiceTestBase.java      |   6 +-
 .../test/search/SearchTestContainer.java      |   2 +
 .../ingestion/DataHubIngestionSourceInfo.pdl  |  13 +-
 15 files changed, 267 insertions(+), 16 deletions(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java
index 7db0b6f826a04..1140c031f1d35 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java
@@ -5,6 +5,7 @@
 import com.linkedin.datahub.graphql.generated.IngestionConfig;
 import com.linkedin.datahub.graphql.generated.IngestionSchedule;
 import com.linkedin.datahub.graphql.generated.IngestionSource;
+import com.linkedin.datahub.graphql.generated.StringMapEntry;
 import com.linkedin.datahub.graphql.generated.StructuredReport;
 import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper;
 import com.linkedin.entity.EntityResponse;
@@ -21,6 +22,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.stream.Collectors;
 import lombok.extern.slf4j.Slf4j;
 
 
@@ -143,6 +145,14 @@ public static IngestionConfig mapIngestionSourceConfig(final DataHubIngestionSou
     result.setVersion(config.getVersion());
     result.setExecutorId(config.getExecutorId());
     result.setDebugMode(config.isDebugMode());
+    if (config.getExtraArgs() != null) {
+      List<StringMapEntry> extraArgs = config.getExtraArgs()
+          .keySet()
+          .stream()
+          .map(key -> new StringMapEntry(key, config.getExtraArgs().get(key)))
+          .collect(Collectors.toList());
+      result.setExtraArgs(extraArgs);
+    }
     return result;
   }
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java
index e5064e6620526..ea20b837e0a1f 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java
@@ -117,6 +117,9 @@ public CompletableFuture<String> get(final DataFetchingEnvironment environment)
           if (ingestionSourceInfo.getConfig().hasDebugMode()) {
             debugMode = ingestionSourceInfo.getConfig().isDebugMode() ? "true" : "false";
           }
+          if (ingestionSourceInfo.getConfig().hasExtraArgs()) {
+            arguments.putAll(ingestionSourceInfo.getConfig().getExtraArgs());
+          }
           arguments.put(DEBUG_MODE_ARG_NAME, debugMode);
           execInput.setArgs(new StringMap(arguments));
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java
index 2ce394ad5ba84..68e334bd976f8 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java
@@ -1,10 +1,12 @@
 package com.linkedin.datahub.graphql.resolvers.ingest.source;
 
 import com.linkedin.common.urn.Urn;
+import com.linkedin.data.template.StringMap;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
 import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode;
 import com.linkedin.datahub.graphql.exception.DataHubGraphQLException;
+import com.linkedin.datahub.graphql.generated.StringMapEntryInput;
 import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceConfigInput;
 import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceInput;
 import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceScheduleInput;
@@ -17,6 +19,8 @@
 import com.linkedin.mxe.MetadataChangeProposal;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
+import java.util.Map;
+import java.util.stream.Collectors;
 import lombok.extern.slf4j.Slf4j;
 
 import java.net.URISyntaxException;
@@ -108,6 +112,12 @@ private DataHubIngestionSourceConfig mapConfig(final UpdateIngestionSourceConfig
     if (input.getDebugMode() != null) {
       result.setDebugMode(input.getDebugMode());
     }
+    if (input.getExtraArgs() != null) {
+      Map<String, String> extraArgs = input.getExtraArgs()
+          .stream()
+          .collect(Collectors.toMap(StringMapEntryInput::getKey, StringMapEntryInput::getValue));
+      result.setExtraArgs(new StringMap(extraArgs));
+    }
     return result;
   }
 
diff --git a/datahub-graphql-core/src/main/resources/ingestion.graphql b/datahub-graphql-core/src/main/resources/ingestion.graphql
index 69c8aff124583..21f9fb2633119 100644
--- a/datahub-graphql-core/src/main/resources/ingestion.graphql
+++ b/datahub-graphql-core/src/main/resources/ingestion.graphql
@@ -332,6 +332,11 @@ type IngestionConfig {
   Advanced: Whether or not to run ingestion in debug mode
   """
   debugMode: Boolean
+
+  """
+  Advanced: Extra arguments for the ingestion run.
+  """
+  extraArgs: [StringMapEntry!]
 }
 
 """
@@ -483,6 +488,11 @@ input UpdateIngestionSourceConfigInput {
   Whether or not to run ingestion in debug mode
   """
   debugMode: Boolean
+
+  """
+  Extra arguments for the ingestion run.
+  """
+  extraArgs: [StringMapEntryInput!]
 }
 
 """
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java
index 2538accc694fb..16d8da9169a8f 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java
@@ -26,7 +26,7 @@ public class UpsertIngestionSourceResolverTest {
       "Test source",
       "mysql", "Test source description",
       new UpdateIngestionSourceScheduleInput("* * * * *", "UTC"),
-      new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false)
+      new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false, null)
   );
 
   @Test
diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
index 0e341a5ff3a79..13af19b0b6ac2 100644
--- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
+++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
@@ -15,7 +15,7 @@ import { Message } from '../../shared/Message';
 import TabToolbar from '../../entity/shared/components/styled/TabToolbar';
 import { IngestionSourceBuilderModal } from './builder/IngestionSourceBuilderModal';
 import { addToListIngestionSourcesCache, CLI_EXECUTOR_ID, removeFromListIngestionSourcesCache } from './utils';
-import { DEFAULT_EXECUTOR_ID, SourceBuilderState } from './builder/types';
+import { DEFAULT_EXECUTOR_ID, SourceBuilderState, StringMapEntryInput } from './builder/types';
 import { IngestionSource, UpdateIngestionSourceInput } from '../../../types.generated';
 import { SearchBar } from '../../search/SearchBar';
 import { useEntityRegistry } from '../../useEntityRegistry';
@@ -173,6 +173,11 @@ export const IngestionSourceList = () => {
         setFocusSourceUrn(undefined);
     };
 
+    const formatExtraArgs = (extraArgs): StringMapEntryInput[] => {
+        if (extraArgs === null || extraArgs === undefined) return [];
+        return extraArgs.map((entry) => ({ key: entry.key, value: entry.value }));
+    };
+
     const createOrUpdateIngestionSource = (
         input: UpdateIngestionSourceInput,
         resetState: () => void,
@@ -294,6 +299,7 @@ export const IngestionSourceList = () => {
                             (recipeBuilderState.config?.executorId as string)) ||
                         DEFAULT_EXECUTOR_ID,
                     debugMode: recipeBuilderState.config?.debugMode || false,
+                    extraArgs: formatExtraArgs(recipeBuilderState.config?.extraArgs || []),
                 },
                 schedule: recipeBuilderState.schedule && {
                     interval: recipeBuilderState.schedule?.interval as string,
diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
index 992ebff643c31..f4c048bcaf0d2 100644
--- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
+++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
@@ -1,7 +1,7 @@
 import { Button, Checkbox, Collapse, Form, Input, Typography } from 'antd';
 import React from 'react';
 import styled from 'styled-components';
-import { SourceBuilderState, StepProps } from './types';
+import { SourceBuilderState, StepProps, StringMapEntryInput } from './types';
 
 const ControlsContainer = styled.div`
     display: flex;
@@ -13,6 +13,10 @@ const SaveButton = styled(Button)`
     margin-right: 15px;
 `;
 
+const ExtraEnvKey = 'extra_env_vars';
+const ExtraReqKey = 'extra_pip_requirements';
+const ExtraPluginKey = 'extra_pip_plugins';
+
 export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) => {
     const setName = (stagedName: string) => {
         const newState: SourceBuilderState = {
@@ -55,6 +59,90 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
         updateState(newState);
     };
 
+    const retrieveExtraEnvs = () => {
+        const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+        const index: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number;
+        if (index > -1) {
+            return extraArgs[index].value;
+        }
+        return '';
+    };
+
+    const setExtraEnvs = (envs: string) => {
+        let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+        const indxOfEnvVars: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number;
+        const value = { key: ExtraEnvKey, value: envs };
+        if (indxOfEnvVars > -1) {
+            extraArgs[indxOfEnvVars] = value;
+        } else {
+            extraArgs = [...extraArgs, value];
+        }
+        const newState: SourceBuilderState = {
+            ...state,
+            config: {
+                ...state.config,
+                extraArgs,
+            },
+        };
+        updateState(newState);
+    };
+
+    const retrieveExtraDataHubPlugins = () => {
+        const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+        const index: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number;
+        if (index > -1) {
+            return extraArgs[index].value;
+        }
+        return '';
+    };
+
+    const setExtraDataHubPlugins = (plugins: string) => {
+        let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+        const indxOfPlugins: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number;
+        const value = { key: ExtraPluginKey, value: plugins };
+        if (indxOfPlugins > -1) {
+            extraArgs[indxOfPlugins] = value;
+        } else {
+            extraArgs = [...extraArgs, value];
+        }
+        const newState: SourceBuilderState = {
+            ...state,
+            config: {
+                ...state.config,
+                extraArgs,
+            },
+        };
+        updateState(newState);
+    };
+
+    const retrieveExtraReqs = () => {
+        const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+        const index: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number;
+        if (index > -1) {
+            return extraArgs[index].value;
+        }
+        return '';
+    };
+
+    const setExtraReqs = (reqs: string) => {
+        let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+        const indxOfReqs: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number;
+        const value = { key: ExtraReqKey, value: reqs };
+        if (indxOfReqs > -1) {
+            extraArgs[indxOfReqs] = value;
+        } else {
+            extraArgs = [...extraArgs, value];
+        }
+        const newState: SourceBuilderState = {
+            ...state,
+            config: {
+                ...state.config,
+                extraArgs,
+            },
+        };
+        updateState(newState);
+    };
+
     const onClickCreate = (shouldRun?: boolean) => {
         if (state.name !== undefined && state.name.length > 0) {
             submit(shouldRun);
@@ -116,6 +204,39 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
                                 onChange={(event) => setDebugMode(event.target.checked)}
                             />
                         </Form.Item>
+                        <Form.Item label={<Typography.Text strong>Extra Enviroment Variables</Typography.Text>}>
+                            <Typography.Paragraph>
+                                Advanced: Set extra environment variables to an ingestion execution
+                            </Typography.Paragraph>
+                            <Input
+                                data-testid="extra-args-input"
+                                placeholder='{"MY_CUSTOM_ENV": "my_custom_value2"}'
+                                value={retrieveExtraEnvs()}
+                                onChange={(event) => setExtraEnvs(event.target.value)}
+                            />
+                        </Form.Item>
+                        <Form.Item label={<Typography.Text strong>Extra DataHub plugins</Typography.Text>}>
+                            <Typography.Paragraph>
+                                Advanced: Set extra DataHub plugins for an ingestion execution
+                            </Typography.Paragraph>
+                            <Input
+                                data-testid="extra-pip-plugin-input"
+                                placeholder='["debug"]'
+                                value={retrieveExtraDataHubPlugins()}
+                                onChange={(event) => setExtraDataHubPlugins(event.target.value)}
+                            />
+                        </Form.Item>
+                        <Form.Item label={<Typography.Text strong>Extra Pip Libraries</Typography.Text>}>
+                            <Typography.Paragraph>
+                                Advanced: Add extra pip libraries for an ingestion execution
+                            </Typography.Paragraph>
+                            <Input
+                                data-testid="extra-pip-reqs-input"
+                                placeholder='["sqlparse==0.4.3"]'
+                                value={retrieveExtraReqs()}
+                                onChange={(event) => setExtraReqs(event.target.value)}
+                            />
+                        </Form.Item>
                     </Collapse.Panel>
                 </Collapse>
             </Form>
diff --git a/datahub-web-react/src/app/ingest/source/builder/types.ts b/datahub-web-react/src/app/ingest/source/builder/types.ts
index cfe0f27ae7dbe..2df467b7beba1 100644
--- a/datahub-web-react/src/app/ingest/source/builder/types.ts
+++ b/datahub-web-react/src/app/ingest/source/builder/types.ts
@@ -34,6 +34,18 @@ export type StepProps = {
     ingestionSources: SourceConfig[];
 };
 
+export type StringMapEntryInput = {
+    /**
+     * The key of the map entry
+     */
+    key: string;
+
+    /**
+     * The value fo the map entry
+     */
+    value: string;
+};
+
 /**
  * The object represents the state of the Ingestion Source Builder form.
  */
@@ -91,5 +103,10 @@ export interface SourceBuilderState {
          * Advanced: Whether or not to run this ingestion source in debug mode
          */
         debugMode?: boolean | null;
+
+        /**
+         * Advanced: Extra arguments for the ingestion run.
+         */
+        extraArgs?: StringMapEntryInput[] | null;
     };
 }
diff --git a/datahub-web-react/src/graphql/ingestion.graphql b/datahub-web-react/src/graphql/ingestion.graphql
index c127e9ec03f9a..1767fe34bfef0 100644
--- a/datahub-web-react/src/graphql/ingestion.graphql
+++ b/datahub-web-react/src/graphql/ingestion.graphql
@@ -12,6 +12,10 @@ query listIngestionSources($input: ListIngestionSourcesInput!) {
                 version
                 executorId
                 debugMode
+                extraArgs {
+                    key
+                    value
+                }
             }
             schedule {
                 interval
@@ -51,6 +55,10 @@ query getIngestionSource($urn: String!, $runStart: Int, $runCount: Int) {
             version
             executorId
             debugMode
+            extraArgs {
+                key
+                value
+            }
         }
         schedule {
             interval
diff --git a/docker/build.gradle b/docker/build.gradle
index c8fdbc86b18b7..56634a5fe0c67 100644
--- a/docker/build.gradle
+++ b/docker/build.gradle
@@ -97,10 +97,20 @@ task quickstartDebug(type: Exec, dependsOn: ':metadata-ingestion:install') {
     dependsOn(debug_modules.collect { it + ':dockerTagDebug' })
     shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke'
 
-    environment "DATAHUB_PRECREATE_TOPICS", "true"
     environment "DATAHUB_TELEMETRY_ENABLED", "false"
     environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
 
+    // Elastic
+    // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch'
+    // environment "DATAHUB_SEARCH_TAG", '7.10.1'
+
+    // OpenSearch
+    environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch'
+    environment "DATAHUB_SEARCH_TAG", '2.9.0'
+    environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true'
+    environment "USE_AWS_ELASTICSEARCH", 'true'
+
+
     def cmd = [
             'source ../metadata-ingestion/venv/bin/activate && ',
             'datahub docker quickstart',
diff --git a/docs/ui-ingestion.md b/docs/ui-ingestion.md
index db2007e1e19a9..438ddd8823b7e 100644
--- a/docs/ui-ingestion.md
+++ b/docs/ui-ingestion.md
@@ -1,5 +1,12 @@
+import FeatureAvailability from '@site/src/components/FeatureAvailability';
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # Ingestion
 
+<FeatureAvailability/>
+
 ## Introduction 
 
 Starting in version `0.8.25`, DataHub supports creating, configuring, scheduling, & executing batch metadata ingestion using the DataHub user interface. This makes
@@ -173,28 +180,29 @@ Finally, give your Ingestion Source a name.
 Once you're happy with your configurations, click 'Done' to save your changes.
 
 
-##### Advanced: Running with a specific CLI version 
+##### Advanced ingestion configs:
 
-DataHub comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible
+DataHub's Managed Ingestion UI comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible
 with the server. However, you can override the default package version using the 'Advanced' source configurations.
 
 To do so, simply click 'Advanced', then change the 'CLI Version' text box to contain the exact version
 of the DataHub CLI you'd like to use.
 
-
 <p align="center">
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/custom-ingestion-cli-version.png"/>
 </p>
 
 _Pinning the CLI version to version `0.8.23.2`_
 
+Other advanced options include specifying **environment variables**, **DataHub plugins** or **python packages at runtime**. 
+
 Once you're happy with your changes, simply click 'Done' to save. 
 
    </TabItem>
    <TabItem value="cli" label="CLI" default>
 
 You can upload and even update recipes using the cli as mentioned in the [cli documentation for uploading ingestion recipes](./cli.md#ingest-deploy).
-An example execution would look something like:
+An example execution for a given `recipe.yaml` file, would look something like:
 
 ```bash
 datahub ingest deploy --name "My Test Ingestion Source" --schedule "5 * * * *" --time-zone "UTC" -c recipe.yaml
@@ -330,8 +338,8 @@ for the `datahub-actions` container and running `docker logs <container-id>`.
 There are valid cases for ingesting metadata without the UI-based ingestion scheduler. For example,
 
 - You have written a custom ingestion Source
-- Your data sources are not reachable on the network where DataHub is deployed
-- Your ingestion source requires context from a local filesystem (e.g. input files, environment variables, etc)
+- Your data sources are not reachable on the network where DataHub is deployed. Managed DataHub users can use a [remote executor](managed-datahub/operator-guide/setting-up-remote-ingestion-executor-on-aws.md) for remote UI-based ingestion.
+- Your ingestion source requires context from a local filesystem (e.g. input files)
 - You want to distribute metadata ingestion among multiple producers / environments
 
 ### How do I attach policies to the actions pod to give it permissions to pull metadata from various sources?
diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
index d876d99b494f8..77cc2f456aa2d 100644
--- a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
+++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
@@ -13,6 +13,35 @@ This page documents how to perform memory profiles of ingestion runs.
 It is useful when trying to size the amount of resources necessary to ingest some source or when developing new features or sources.
 
 ## How to use
+
+<Tabs>
+<TabItem value="ui" label="UI" default>
+
+Create an ingestion as specified in the [Ingestion guide](../../../docs/ui-ingestion.md).
+
+Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion:
+```yaml
+source:
+  ...
+
+sink:
+  ...
+
+flags:
+  generate_memory_profiles: "<path to folder where dumps will be written to>"
+```
+
+In the final panel, under the advanced section, add the `debug` datahub package under the **Extra DataHub Plugins** section.
+As seen below:
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/ingestion-advanced-extra-datahub-plugin.png"/>
+</p>
+
+Finally, save and run the ingestion process.
+
+</TabItem>
+<TabItem value="cli" label="CLI" default>
 Install the `debug` plugin for DataHub's CLI wherever the ingestion runs:
 
 ```bash
@@ -33,6 +62,16 @@ flags:
   generate_memory_profiles: "<path to folder where dumps will be written to>"
 ```
 
+Finally run the ingestion recipe
+
+```bash
+$ datahub ingest -c recipe.yaml
+```
+
+</TabItem>
+</Tabs>
+
+
 Once the ingestion run starts a binary file will be created and appended to during the execution of the ingestion. 
 
 These files follow the pattern `file-<ingestion-run-urn>.bin` for a unique identification.
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
index b19d2026fbfc4..1362a0f69eff2 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
@@ -892,8 +892,10 @@ public void testGetIndexSizes() {
     //CHECKSTYLE:OFF
     /*
     Example result:
-    {aspectName=testentityprofile, sizeMb=52.234, indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity}
-    {aspectName=testentityprofile, sizeMb=0.208, indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests}
+    {aspectName=testentityprofile, sizeMb=52.234,
+    indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity}
+    {aspectName=testentityprofile, sizeMb=0.208,
+    indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests}
      */
     // There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there
     //CHECKSTYLE:ON
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
index 4c1555fc510e6..34aa6978f742f 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
@@ -5,7 +5,9 @@
 import java.time.Duration;
 
 public interface SearchTestContainer {
+
     String SEARCH_JAVA_OPTS = "-Xms446m -Xmx446m -XX:MaxDirectMemorySize=368435456";
+
     Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min
 
     GenericContainer<?> startContainer();
diff --git a/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl
index b3e237202fc2f..f777b5d6e12e7 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl
@@ -37,10 +37,10 @@ record DataHubIngestionSourceInfo {
   * Parameters associated with the Ingestion Source
   */
   config: record DataHubIngestionSourceConfig {
-    /**
-     * The JSON recipe to use for ingestion
-     */
-     recipe: string
+     /**
+      * The JSON recipe to use for ingestion
+      */
+      recipe: string
 
      /**
       * The PyPI version of the datahub CLI to use when executing a recipe
@@ -56,5 +56,10 @@ record DataHubIngestionSourceInfo {
       * Whether or not to run this ingestion source in debug mode
       */
       debugMode: optional boolean
+
+     /**
+      * Extra arguments for the ingestion run.
+      */
+      extraArgs: optional map[string, string]
   }
 }
\ No newline at end of file

From 04216e30bb2a1eab1993e48276d9f8e52d6b3121 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Sat, 21 Oct 2023 17:21:54 +0200
Subject: [PATCH 81/98] feat(ingest/s3): S3 add partition to schema (#8900)

Co-authored-by: Pedro Silva <pedro@acryl.io>
---
 .../src/datahub/ingestion/source/s3/config.py |  5 ++-
 .../src/datahub/ingestion/source/s3/source.py | 33 +++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
index 9b5296f0b9dd5..3ef6476078f6f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
@@ -75,7 +75,10 @@ class DataLakeSourceConfig(
         default=100,
         description="Maximum number of rows to use when inferring schemas for TSV and CSV files.",
     )
-
+    add_partition_columns_to_schema: bool = Field(
+        default=False,
+        description="Whether to add partition fields to the schema.",
+    )
     verify_ssl: Union[bool, str] = Field(
         default=True,
         description="Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index eb49fcbb268c0..94c571eabad11 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -78,6 +78,7 @@
     NullTypeClass,
     NumberTypeClass,
     RecordTypeClass,
+    SchemaField,
     SchemaFieldDataType,
     SchemaMetadata,
     StringTypeClass,
@@ -90,6 +91,7 @@
     OperationClass,
     OperationTypeClass,
     OtherSchemaClass,
+    SchemaFieldDataTypeClass,
     _Aspect,
 )
 from datahub.telemetry import stats, telemetry
@@ -458,8 +460,39 @@ def get_fields(self, table_data: TableData, path_spec: PathSpec) -> List:
         logger.debug(f"Extracted fields in schema: {fields}")
         fields = sorted(fields, key=lambda f: f.fieldPath)
 
+        if self.source_config.add_partition_columns_to_schema:
+            self.add_partition_columns_to_schema(
+                fields=fields, path_spec=path_spec, full_path=table_data.full_path
+            )
+
         return fields
 
+    def add_partition_columns_to_schema(
+        self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
+    ) -> None:
+        is_fieldpath_v2 = False
+        for field in fields:
+            if field.fieldPath.startswith("[version=2.0]"):
+                is_fieldpath_v2 = True
+                break
+        vars = path_spec.get_named_vars(full_path)
+        if vars is not None and "partition_key" in vars:
+            for partition_key in vars["partition_key"].values():
+                fields.append(
+                    SchemaField(
+                        fieldPath=f"{partition_key}"
+                        if not is_fieldpath_v2
+                        else f"[version=2.0].[type=string].{partition_key}",
+                        nativeDataType="string",
+                        type=SchemaFieldDataType(StringTypeClass())
+                        if not is_fieldpath_v2
+                        else SchemaFieldDataTypeClass(type=StringTypeClass()),
+                        isPartitioningKey=True,
+                        nullable=True,
+                        recursive=False,
+                    )
+                )
+
     def get_table_profile(
         self, table_data: TableData, dataset_urn: str
     ) -> Iterable[MetadataWorkUnit]:

From 633e6d6f779dc1e09bdfd10a160889684c485706 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Mon, 23 Oct 2023 17:15:44 +0100
Subject: [PATCH 82/98] feat(frontend): Remove debug flag from start script
 (#9075)

---
 docker/datahub-frontend/start.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/datahub-frontend/start.sh b/docker/datahub-frontend/start.sh
index 430982aa2456b..9dc1514144bb1 100755
--- a/docker/datahub-frontend/start.sh
+++ b/docker/datahub-frontend/start.sh
@@ -50,7 +50,6 @@ export JAVA_OPTS="-Xms512m \
    -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf \
    -Dlogback.configurationFile=datahub-frontend/conf/logback.xml \
    -Dlogback.debug=false \
-   -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 \
    ${PROMETHEUS_AGENT:-} ${OTEL_AGENT:-} \
    ${TRUSTSTORE_FILE:-} ${TRUSTSTORE_TYPE:-} ${TRUSTSTORE_PASSWORD:-} \
    ${HTTP_PROXY:-} ${HTTPS_PROXY:-} ${NO_PROXY:-} \

From 8fb95e88a17260d0d6727f4d5e09636b128faf47 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Mon, 23 Oct 2023 12:40:42 -0700
Subject: [PATCH 83/98] feat(sqlparser): parse create DDL statements (#9002)

---
 .../goldens/v2_sqlite_operator.json           | 162 +++++++++++++++---
 .../v2_sqlite_operator_no_dag_listener.json   | 162 +++++++++++++++---
 .../datahub/emitter/sql_parsing_builder.py    |   9 +-
 .../testing/check_sql_parser_result.py        |   9 +
 .../src/datahub/utilities/sqlglot_lineage.py  |  92 ++++++++--
 .../test_bigquery_create_view_with_cte.json   |   8 +-
 ..._bigquery_from_sharded_table_wildcard.json |   4 +-
 .../test_bigquery_nested_subqueries.json      |   4 +-
 ..._bigquery_sharded_table_normalization.json |   4 +-
 .../test_bigquery_star_with_replace.json      |   6 +-
 .../test_bigquery_view_from_union.json        |   4 +-
 .../goldens/test_create_table_ddl.json        |  55 +++++-
 .../goldens/test_create_view_as_select.json   |   2 +-
 .../test_select_from_struct_subfields.json    |   2 +-
 .../test_select_with_full_col_name.json       |   2 +-
 .../test_teradata_default_normalization.json  |   2 +
 16 files changed, 430 insertions(+), 97 deletions(-)

diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
index 1a32b38ce055d..81d0a71b651d9 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
@@ -74,9 +74,7 @@
                 "downstream_task_ids": "['populate_cost_table']",
                 "inlets": "[]",
                 "outlets": "[]",
-                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
-                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
-                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}"
             },
             "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
             "name": "create_cost_table",
@@ -98,7 +96,44 @@
                 "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
             ],
             "inputDatajobs": [],
-            "fineGrainedLineages": []
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
         }
     }
 },
@@ -157,7 +192,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 06:56:24.632190+00:00",
+                "start_date": "2023-10-15 20:29:10.262813+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -172,7 +207,7 @@
             "name": "sqlite_operator_create_cost_table_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696056984632,
+                "time": 1697401750262,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -221,7 +256,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056984632,
+            "timestampMillis": 1697401750262,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -251,9 +286,7 @@
                 "downstream_task_ids": "['populate_cost_table']",
                 "inlets": "[]",
                 "outlets": "[]",
-                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
-                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
-                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}"
             },
             "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
             "name": "create_cost_table",
@@ -275,7 +308,80 @@
                 "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
             ],
             "inputDatajobs": [],
-            "fineGrainedLineages": []
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
         }
     }
 },
@@ -331,7 +437,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056984947,
+            "timestampMillis": 1697401750651,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -447,7 +553,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 06:56:28.605901+00:00",
+                "start_date": "2023-10-15 20:29:15.013834+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -462,7 +568,7 @@
             "name": "sqlite_operator_populate_cost_table_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696056988605,
+                "time": 1697401755013,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -511,7 +617,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056988605,
+            "timestampMillis": 1697401755013,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -621,7 +727,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056989098,
+            "timestampMillis": 1697401755600,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -807,7 +913,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 06:56:32.888165+00:00",
+                "start_date": "2023-10-15 20:29:20.216818+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -822,7 +928,7 @@
             "name": "sqlite_operator_transform_cost_table_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696056992888,
+                "time": 1697401760216,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -895,7 +1001,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056992888,
+            "timestampMillis": 1697401760216,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1131,7 +1237,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056993744,
+            "timestampMillis": 1697401761237,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1249,7 +1355,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 06:56:37.745717+00:00",
+                "start_date": "2023-10-15 20:29:26.243934+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -1264,7 +1370,7 @@
             "name": "sqlite_operator_cleanup_costs_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696056997745,
+                "time": 1697401766243,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -1313,7 +1419,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056997745,
+            "timestampMillis": 1697401766243,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1425,7 +1531,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696056998672,
+            "timestampMillis": 1697401767373,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1543,7 +1649,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 06:56:42.645806+00:00",
+                "start_date": "2023-10-15 20:29:32.075613+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -1558,7 +1664,7 @@
             "name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696057002645,
+                "time": 1697401772075,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -1607,7 +1713,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057002645,
+            "timestampMillis": 1697401772075,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1719,7 +1825,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057003759,
+            "timestampMillis": 1697401773454,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
index c082be693e30c..96a0f02ccec17 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
@@ -74,9 +74,7 @@
                 "downstream_task_ids": "['populate_cost_table']",
                 "inlets": "[]",
                 "outlets": "[]",
-                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
-                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
-                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}"
             },
             "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
             "name": "create_cost_table",
@@ -98,7 +96,44 @@
                 "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
             ],
             "inputDatajobs": [],
-            "fineGrainedLineages": []
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
         }
     }
 },
@@ -157,7 +192,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 07:00:45.832554+00:00",
+                "start_date": "2023-10-15 20:27:26.883178+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -172,7 +207,7 @@
             "name": "sqlite_operator_create_cost_table_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696057245832,
+                "time": 1697401646883,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -221,7 +256,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057245832,
+            "timestampMillis": 1697401646883,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -251,9 +286,7 @@
                 "downstream_task_ids": "['populate_cost_table']",
                 "inlets": "[]",
                 "outlets": "[]",
-                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
-                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
-                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}"
             },
             "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
             "name": "create_cost_table",
@@ -275,7 +308,80 @@
                 "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
             ],
             "inputDatajobs": [],
-            "fineGrainedLineages": []
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
         }
     }
 },
@@ -331,7 +437,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057246734,
+            "timestampMillis": 1697401647826,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -502,7 +608,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 07:00:49.653938+00:00",
+                "start_date": "2023-10-15 20:27:31.398799+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -517,7 +623,7 @@
             "name": "sqlite_operator_populate_cost_table_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696057249653,
+                "time": 1697401651398,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -566,7 +672,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057249653,
+            "timestampMillis": 1697401651398,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -676,7 +782,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057250831,
+            "timestampMillis": 1697401652651,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -917,7 +1023,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 07:00:53.989264+00:00",
+                "start_date": "2023-10-15 20:27:37.697995+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -932,7 +1038,7 @@
             "name": "sqlite_operator_transform_cost_table_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696057253989,
+                "time": 1697401657697,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -1005,7 +1111,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057253989,
+            "timestampMillis": 1697401657697,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1241,7 +1347,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057255628,
+            "timestampMillis": 1697401659496,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1414,7 +1520,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 07:01:00.421177+00:00",
+                "start_date": "2023-10-15 20:27:45.670215+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -1429,7 +1535,7 @@
             "name": "sqlite_operator_cleanup_costs_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696057260421,
+                "time": 1697401665670,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -1478,7 +1584,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057260421,
+            "timestampMillis": 1697401665670,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1590,7 +1696,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057262258,
+            "timestampMillis": 1697401667670,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1763,7 +1869,7 @@
             "customProperties": {
                 "run_id": "manual_run_test",
                 "duration": "None",
-                "start_date": "2023-09-30 07:01:05.540192+00:00",
+                "start_date": "2023-10-15 20:27:51.559194+00:00",
                 "end_date": "None",
                 "execution_date": "2023-09-27 21:34:38+00:00",
                 "try_number": "0",
@@ -1778,7 +1884,7 @@
             "name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
             "type": "BATCH_AD_HOC",
             "created": {
-                "time": 1696057265540,
+                "time": 1697401671559,
                 "actor": "urn:li:corpuser:datahub"
             }
         }
@@ -1827,7 +1933,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057265540,
+            "timestampMillis": 1697401671559,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
@@ -1939,7 +2045,7 @@
     "aspectName": "dataProcessInstanceRunEvent",
     "aspect": {
         "json": {
-            "timestampMillis": 1696057267631,
+            "timestampMillis": 1697401673788,
             "partitionSpec": {
                 "type": "FULL_TABLE",
                 "partition": "FULL_TABLE_SNAPSHOT"
diff --git a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py
index 071d590f270f8..dedcfa0385f75 100644
--- a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py
@@ -179,15 +179,16 @@ def add_lineage(
 
     def gen_workunits(self) -> Iterable[MetadataWorkUnit]:
         if self.generate_lineage:
-            yield from self._gen_lineage_workunits()
+            for mcp in self._gen_lineage_mcps():
+                yield mcp.as_workunit()
         if self.generate_usage_statistics:
             yield from self._gen_usage_statistics_workunits()
 
-    def _gen_lineage_workunits(self) -> Iterable[MetadataWorkUnit]:
+    def _gen_lineage_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
         for downstream_urn in self._lineage_map:
             upstreams: List[UpstreamClass] = []
             fine_upstreams: List[FineGrainedLineageClass] = []
-            for upstream_urn, edge in self._lineage_map[downstream_urn].items():
+            for edge in self._lineage_map[downstream_urn].values():
                 upstreams.append(edge.gen_upstream_aspect())
                 fine_upstreams.extend(edge.gen_fine_grained_lineage_aspects())
 
@@ -201,7 +202,7 @@ def _gen_lineage_workunits(self) -> Iterable[MetadataWorkUnit]:
             )
             yield MetadataChangeProposalWrapper(
                 entityUrn=downstream_urn, aspect=upstream_lineage
-            ).as_workunit()
+            )
 
     def _gen_usage_statistics_workunits(self) -> Iterable[MetadataWorkUnit]:
         yield from self._usage_aggregator.generate_workunits(
diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
index b3b1331db768b..2b610947e9043 100644
--- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
+++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
@@ -24,6 +24,7 @@ def assert_sql_result_with_resolver(
     *,
     expected_file: pathlib.Path,
     schema_resolver: SchemaResolver,
+    allow_table_error: bool = False,
     **kwargs: Any,
 ) -> None:
     # HACK: Our BigQuery source overwrites this value and doesn't undo it.
@@ -36,6 +37,14 @@ def assert_sql_result_with_resolver(
         **kwargs,
     )
 
+    if res.debug_info.table_error:
+        if allow_table_error:
+            logger.info(
+                f"SQL parser table error: {res.debug_info.table_error}",
+                exc_info=res.debug_info.table_error,
+            )
+        else:
+            raise res.debug_info.table_error
     if res.debug_info.column_error:
         logger.warning(
             f"SQL parser column error: {res.debug_info.column_error}",
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index c830ec8c02fd4..97121b368f507 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -241,9 +241,9 @@ class SqlParsingResult(_ParserBaseModel):
     )
 
 
-def _parse_statement(sql: str, dialect: str) -> sqlglot.Expression:
-    statement = sqlglot.parse_one(
-        sql, read=dialect, error_level=sqlglot.ErrorLevel.RAISE
+def _parse_statement(sql: sqlglot.exp.ExpOrStr, dialect: str) -> sqlglot.Expression:
+    statement: sqlglot.Expression = sqlglot.maybe_parse(
+        sql, dialect=dialect, error_level=sqlglot.ErrorLevel.RAISE
     )
     return statement
 
@@ -467,14 +467,20 @@ def _column_level_lineage(  # noqa: C901
     default_db: Optional[str],
     default_schema: Optional[str],
 ) -> List[_ColumnLineageInfo]:
-    if not isinstance(
-        statement,
-        _SupportedColumnLineageTypesTuple,
+    is_create_ddl = _is_create_table_ddl(statement)
+    if (
+        not isinstance(
+            statement,
+            _SupportedColumnLineageTypesTuple,
+        )
+        and not is_create_ddl
     ):
         raise UnsupportedStatementTypeError(
             f"Can only generate column-level lineage for select-like inner statements, not {type(statement)}"
         )
 
+    column_lineage: List[_ColumnLineageInfo] = []
+
     use_case_insensitive_cols = dialect in {
         # Column identifiers are case-insensitive in BigQuery, so we need to
         # do a normalization step beforehand to make sure it's resolved correctly.
@@ -580,6 +586,38 @@ def _schema_aware_fuzzy_column_resolve(
         ) from e
     logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect))
 
+    # Handle the create DDL case.
+    if is_create_ddl:
+        assert (
+            output_table is not None
+        ), "output_table must be set for create DDL statements"
+
+        create_schema: sqlglot.exp.Schema = statement.this
+        sqlglot_columns = create_schema.expressions
+
+        for column_def in sqlglot_columns:
+            if not isinstance(column_def, sqlglot.exp.ColumnDef):
+                # Ignore things like constraints.
+                continue
+
+            output_col = _schema_aware_fuzzy_column_resolve(
+                output_table, column_def.name
+            )
+            output_col_type = column_def.args.get("kind")
+
+            column_lineage.append(
+                _ColumnLineageInfo(
+                    downstream=_DownstreamColumnRef(
+                        table=output_table,
+                        column=output_col,
+                        column_type=output_col_type,
+                    ),
+                    upstreams=[],
+                )
+            )
+
+        return column_lineage
+
     # Try to figure out the types of the output columns.
     try:
         statement = sqlglot.optimizer.annotate_types.annotate_types(
@@ -589,8 +627,6 @@ def _schema_aware_fuzzy_column_resolve(
         # This is not a fatal error, so we can continue.
         logger.debug("sqlglot failed to annotate types: %s", e)
 
-    column_lineage = []
-
     try:
         assert isinstance(statement, _SupportedColumnLineageTypesTuple)
 
@@ -599,7 +635,6 @@ def _schema_aware_fuzzy_column_resolve(
             (select_col.alias_or_name, select_col) for select_col in statement.selects
         ]
         logger.debug("output columns: %s", [col[0] for col in output_columns])
-        output_col: str
         for output_col, original_col_expression in output_columns:
             if output_col == "*":
                 # If schema information is available, the * will be expanded to the actual columns.
@@ -628,7 +663,7 @@ def _schema_aware_fuzzy_column_resolve(
 
             # Generate SELECT lineage.
             # Using a set here to deduplicate upstreams.
-            direct_col_upstreams: Set[_ColumnRef] = set()
+            direct_raw_col_upstreams: Set[_ColumnRef] = set()
             for node in lineage_node.walk():
                 if node.downstream:
                     # We only want the leaf nodes.
@@ -643,8 +678,9 @@ def _schema_aware_fuzzy_column_resolve(
                     if node.subfield:
                         normalized_col = f"{normalized_col}.{node.subfield}"
 
-                    col = _schema_aware_fuzzy_column_resolve(table_ref, normalized_col)
-                    direct_col_upstreams.add(_ColumnRef(table=table_ref, column=col))
+                    direct_raw_col_upstreams.add(
+                        _ColumnRef(table=table_ref, column=normalized_col)
+                    )
                 else:
                     # This branch doesn't matter. For example, a count(*) column would go here, and
                     # we don't get any column-level lineage for that.
@@ -665,7 +701,16 @@ def _schema_aware_fuzzy_column_resolve(
             if original_col_expression.type:
                 output_col_type = original_col_expression.type
 
-            if not direct_col_upstreams:
+            # Fuzzy resolve upstream columns.
+            direct_resolved_col_upstreams = {
+                _ColumnRef(
+                    table=edge.table,
+                    column=_schema_aware_fuzzy_column_resolve(edge.table, edge.column),
+                )
+                for edge in direct_raw_col_upstreams
+            }
+
+            if not direct_resolved_col_upstreams:
                 logger.debug(f'  "{output_col}" has no upstreams')
             column_lineage.append(
                 _ColumnLineageInfo(
@@ -674,12 +719,12 @@ def _schema_aware_fuzzy_column_resolve(
                         column=output_col,
                         column_type=output_col_type,
                     ),
-                    upstreams=sorted(direct_col_upstreams),
+                    upstreams=sorted(direct_resolved_col_upstreams),
                     # logic=column_logic.sql(pretty=True, dialect=dialect),
                 )
             )
 
-        # TODO: Also extract referenced columns (e.g. non-SELECT lineage)
+        # TODO: Also extract referenced columns (aka auxillary / non-SELECT lineage)
     except (sqlglot.errors.OptimizeError, ValueError) as e:
         raise SqlUnderstandingError(
             f"sqlglot failed to compute some lineage: {e}"
@@ -700,6 +745,12 @@ def _extract_select_from_create(
         return statement
 
 
+def _is_create_table_ddl(statement: sqlglot.exp.Expression) -> bool:
+    return isinstance(statement, sqlglot.exp.Create) and isinstance(
+        statement.this, sqlglot.exp.Schema
+    )
+
+
 def _try_extract_select(
     statement: sqlglot.exp.Expression,
 ) -> sqlglot.exp.Expression:
@@ -766,6 +817,7 @@ def _translate_sqlglot_type(
 def _translate_internal_column_lineage(
     table_name_urn_mapping: Dict[_TableName, str],
     raw_column_lineage: _ColumnLineageInfo,
+    dialect: str,
 ) -> ColumnLineageInfo:
     downstream_urn = None
     if raw_column_lineage.downstream.table:
@@ -779,7 +831,9 @@ def _translate_internal_column_lineage(
             )
             if raw_column_lineage.downstream.column_type
             else None,
-            native_column_type=raw_column_lineage.downstream.column_type.sql()
+            native_column_type=raw_column_lineage.downstream.column_type.sql(
+                dialect=dialect
+            )
             if raw_column_lineage.downstream.column_type
             and raw_column_lineage.downstream.column_type.this
             != sqlglot.exp.DataType.Type.UNKNOWN
@@ -800,12 +854,14 @@ def _get_dialect(platform: str) -> str:
     # TODO: convert datahub platform names to sqlglot dialect
     if platform == "presto-on-hive":
         return "hive"
+    if platform == "mssql":
+        return "tsql"
     else:
         return platform
 
 
 def _sqlglot_lineage_inner(
-    sql: str,
+    sql: sqlglot.exp.ExpOrStr,
     schema_resolver: SchemaResolver,
     default_db: Optional[str] = None,
     default_schema: Optional[str] = None,
@@ -918,7 +974,7 @@ def _sqlglot_lineage_inner(
     if column_lineage:
         column_lineage_urns = [
             _translate_internal_column_lineage(
-                table_name_urn_mapping, internal_col_lineage
+                table_name_urn_mapping, internal_col_lineage, dialect=dialect
             )
             for internal_col_lineage in column_lineage
         ]
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
index f0175b4dc8892..d610b0a83f229 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
@@ -18,7 +18,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -36,7 +36,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -54,7 +54,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -72,7 +72,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
index b7df5444987f2..2d3d188d28316 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
@@ -14,7 +14,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -32,7 +32,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
index 67e306bebf545..41ae0885941b0 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
@@ -14,7 +14,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -32,7 +32,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
index b7df5444987f2..2d3d188d28316 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
@@ -14,7 +14,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -32,7 +32,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
index b393b2445d6c4..26f8f8f59a3ff 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
@@ -16,7 +16,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -34,7 +34,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -52,7 +52,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
index 53fb94300e804..83365c09f69c2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
@@ -17,7 +17,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
@@ -39,7 +39,7 @@
                         "com.linkedin.pegasus2avro.schema.StringType": {}
                     }
                 },
-                "native_column_type": "TEXT"
+                "native_column_type": "STRING"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json
index 4773974545bfa..cf31b71cb50f6 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json
@@ -4,5 +4,58 @@
     "out_tables": [
         "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)"
     ],
-    "column_lineage": null
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)",
+                "column": "id",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "INTEGER"
+            },
+            "upstreams": []
+        },
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)",
+                "column": "month",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
+            },
+            "upstreams": []
+        },
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)",
+                "column": "total_cost",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "REAL"
+            },
+            "upstreams": []
+        },
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)",
+                "column": "area",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "REAL"
+            },
+            "upstreams": []
+        }
+    ]
 }
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
index ff452467aa5bd..8a6b60d0f1bde 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
@@ -30,7 +30,7 @@
                         "com.linkedin.pegasus2avro.schema.NumberType": {}
                     }
                 },
-                "native_column_type": "BIGINT"
+                "native_column_type": "NUMBER"
             },
             "upstreams": []
         },
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
index 5ad847e252497..2424fcda34752 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
@@ -14,7 +14,7 @@
                         "com.linkedin.pegasus2avro.schema.NumberType": {}
                     }
                 },
-                "native_column_type": "DECIMAL"
+                "native_column_type": "NUMERIC"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
index 6ee3d2e61c39b..8dd2633eff612 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
@@ -14,7 +14,7 @@
                         "com.linkedin.pegasus2avro.schema.NumberType": {}
                     }
                 },
-                "native_column_type": "DECIMAL"
+                "native_column_type": "NUMERIC"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json
index b0351a7e07ad2..ee80285d87f60 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json
@@ -12,6 +12,7 @@
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
                 "column": "PatientId",
+                "column_type": null,
                 "native_column_type": "INTEGER()"
             },
             "upstreams": [
@@ -25,6 +26,7 @@
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
                 "column": "BMI",
+                "column_type": null,
                 "native_column_type": "FLOAT()"
             },
             "upstreams": [

From 10456c5e3cdaad14927b89bb9deee1a6df0ce92c Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Mon, 23 Oct 2023 14:53:07 -0700
Subject: [PATCH 84/98] docs(ingest): update to get_workunits_internal (#9054)

---
 metadata-ingestion/adding-source.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/adding-source.md b/metadata-ingestion/adding-source.md
index e4fc950a7cdbd..a0930102c6827 100644
--- a/metadata-ingestion/adding-source.md
+++ b/metadata-ingestion/adding-source.md
@@ -62,7 +62,7 @@ Some sources use the default `SourceReport` class, but others inherit and extend
 
 ### 3. Implement the source itself
 
-The core for the source is the `get_workunits` method, which produces a stream of metadata events (typically MCP objects) wrapped up in a MetadataWorkUnit.
+The core for the source is the `get_workunits_internal` method, which produces a stream of metadata events (typically MCP objects) wrapped up in a MetadataWorkUnit.
 The [file source](./src/datahub/ingestion/source/file.py) is a good and simple example.
 
 The MetadataChangeEventClass is defined in the metadata models which are generated

From a0ce4f333e1cbbc544a650ec3e8012a1f10aef2b Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Mon, 23 Oct 2023 21:21:21 -0400
Subject: [PATCH 85/98] Column level lineage and path test (#8822)

---
 .../preview/EntityPaths/EntityPathsModal.tsx  |  1 +
 .../e2e/lineage/lineage_column_path.js        | 68 +++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js

diff --git a/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx b/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx
index d5722429aaf6b..2bb76714d6119 100644
--- a/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx
+++ b/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx
@@ -39,6 +39,7 @@ export default function EntityPathsModal({ paths, resultEntityUrn, hideModal }:
 
     return (
         <StyledModal
+            data-testid="entity-paths-modal"
             title={
                 <Header>
                     Column path{paths.length > 1 && 's'} from{' '}
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js
new file mode 100644
index 0000000000000..37ca62c8d1229
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js
@@ -0,0 +1,68 @@
+import { aliasQuery } from "../utils";
+const DATASET_ENTITY_TYPE = 'dataset';
+const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)';
+const DOWNSTREAM_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)";
+const upstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)-Upstream"] text';
+const downstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)-Downstream"] text';
+
+const verifyColumnPathModal = (from, to) => {
+  cy.get('[data-testid="entity-paths-modal"]').contains(from).should("be.visible");
+  cy.get('[data-testid="entity-paths-modal"]').contains(to).should("be.visible");
+};
+
+describe("column-Level lineage and impact analysis path test", () => {
+    beforeEach(() => {
+        cy.on('uncaught:exception', (err, runnable) => { return false; });
+        cy.intercept("POST", "/api/v2/graphql", (req) => {
+          aliasQuery(req, "appConfig");
+        });
+      });
+
+    it("verify column-level lineage path at lineage praph and impact analysis ", () => {
+      // Open dataset with column-level lineage configured an navigate to lineage tab -> visualize lineage
+      cy.loginWithCredentials();
+      cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN);
+
+      // Enable “show columns” toggle
+      cy.waitTextVisible("SampleCypressHdfs");
+      cy.clickOptionWithTestId("column-toggle");
+      cy.waitTextVisible("shipment_info");
+
+      // Verify functionality of column lineage
+      cy.get(upstreamColumn).eq(3).click();
+      cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'fill', 'white');
+      cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'stroke', 'transparent');
+      cy.get(downstreamColumn).eq(2).click();
+      cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'fill', 'white');
+      cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'stroke', 'transparent');
+
+      // Open dataset impact analysis view, enable column lineage
+      cy.goToDataset(DATASET_URN, "SampleCypressHdfsDataset");
+      cy.openEntityTab("Lineage");
+      cy.clickOptionWithText("Column Lineage");
+      cy.clickOptionWithText("Downstream");
+
+      // Verify upstream column lineage, test column path modal
+      cy.clickOptionWithText("Upstream");
+      cy.waitTextVisible("SampleCypressKafkaDataset");
+      cy.ensureTextNotPresent("field_bar");
+      cy.contains("Select column").click({ force: true}).wait(1000);
+      cy.get(".rc-virtual-list").contains("shipment_info").click(); 
+      cy.waitTextVisible("field_bar");
+      cy.clickOptionWithText("field_bar");
+      verifyColumnPathModal("shipment_info", "field_bar");
+      cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click();
+    
+      // Verify downstream column lineage, test column path modal
+      cy.goToDataset(DOWNSTREAM_DATASET_URN, "SampleCypressKafkaDataset");
+      cy.openEntityTab("Lineage");
+      cy.clickOptionWithText("Column Lineage");
+      cy.ensureTextNotPresent("shipment_info");
+      cy.contains("Select column").click({ force: true}).wait(1000);
+      cy.get(".rc-virtual-list").contains("field_bar").click(); 
+      cy.waitTextVisible("shipment_info");
+      cy.clickOptionWithText("shipment_info");
+      verifyColumnPathModal("shipment_info", "field_bar");
+      cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click();
+    });
+}); 
\ No newline at end of file

From adf8c8db38c56250cb612b208f6e59b04c7258c6 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 24 Oct 2023 02:59:56 -0400
Subject: [PATCH 86/98] refactor(ingest): Move sqlalchemy import out of
 sql_types.py (#9065)

---
 .../src/datahub/ingestion/source/sql/athena.py           | 2 +-
 .../src/datahub/ingestion/source/sql/sql_common.py       | 2 +-
 .../src/datahub/ingestion/source/sql/sql_types.py        | 9 +--------
 .../src/datahub/utilities/sqlalchemy_type_converter.py   | 6 +++++-
 metadata-ingestion/tests/unit/test_athena_source.py      | 2 +-
 .../unit/utilities/test_sqlalchemy_type_converter.py     | 2 +-
 6 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index dad61e5173166..06b9ad92677a2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -31,7 +31,6 @@
     register_custom_type,
 )
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri
-from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     gen_database_container,
@@ -41,6 +40,7 @@
 from datahub.metadata.schema_classes import RecordTypeClass
 from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column
 from datahub.utilities.sqlalchemy_type_converter import (
+    MapType,
     get_schema_fields_for_sqlalchemy_column,
 )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 6524eea8222d4..be03858ec3ef9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -37,7 +37,6 @@
     DatasetSubTypes,
 )
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
-from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     downgrade_schema_from_v2,
@@ -91,6 +90,7 @@
 from datahub.utilities.lossy_collections import LossyList
 from datahub.utilities.registries.domain_registry import DomainRegistry
 from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
+from datahub.utilities.sqlalchemy_type_converter import MapType
 
 if TYPE_CHECKING:
     from datahub.ingestion.source.ge_data_profiler import (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
index 51626891e9fef..ae47623188f42 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
@@ -1,8 +1,6 @@
 import re
 from typing import Any, Dict, ValuesView
 
-from sqlalchemy import types
-
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     ArrayType,
     BooleanType,
@@ -17,6 +15,7 @@
     TimeType,
     UnionType,
 )
+from datahub.utilities.sqlalchemy_type_converter import MapType
 
 # these can be obtained by running `select format_type(oid, null),* from pg_type;`
 # we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
@@ -369,12 +368,6 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
     "array": ArrayType,
 }
 
-
-class MapType(types.TupleType):
-    # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub
-    pass
-
-
 # https://docs.aws.amazon.com/athena/latest/ug/data-types.html
 # https://github.com/dbt-athena/dbt-athena/tree/main
 ATHENA_SQL_TYPES_MAP: Dict[str, Any] = {
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
index a431f262a85fd..1d5ec5dae3519 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
@@ -7,13 +7,17 @@
 from sqlalchemy_bigquery import STRUCT
 
 from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
-from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
 from datahub.metadata.schema_classes import NullTypeClass, SchemaFieldDataTypeClass
 
 logger = logging.getLogger(__name__)
 
 
+class MapType(types.TupleType):
+    # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub
+    pass
+
+
 class SqlAlchemyColumnToAvroConverter:
     """Helper class that collects some methods to convert SQLalchemy columns to Avro schema."""
 
diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py
index 6d3ed20eafde2..23dd7dd5a6e45 100644
--- a/metadata-ingestion/tests/unit/test_athena_source.py
+++ b/metadata-ingestion/tests/unit/test_athena_source.py
@@ -9,7 +9,7 @@
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.aws.s3_util import make_s3_urn
 from datahub.ingestion.source.sql.athena import CustomAthenaRestDialect
-from datahub.ingestion.source.sql.sql_types import MapType
+from datahub.utilities.sqlalchemy_type_converter import MapType
 
 FROZEN_TIME = "2020-04-14 07:00:00"
 
diff --git a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py
index 959da0987a825..6c719d351c4c2 100644
--- a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py
+++ b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py
@@ -3,7 +3,6 @@
 from sqlalchemy import types
 from sqlalchemy_bigquery import STRUCT
 
-from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.metadata.schema_classes import (
     ArrayTypeClass,
     MapTypeClass,
@@ -12,6 +11,7 @@
     RecordTypeClass,
 )
 from datahub.utilities.sqlalchemy_type_converter import (
+    MapType,
     get_schema_fields_for_sqlalchemy_column,
 )
 

From c849246e63284bc73768ed58a22be62b708a6c48 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 24 Oct 2023 00:09:41 -0700
Subject: [PATCH 87/98] fix(ingest): add releases link (#9014)

---
 metadata-ingestion/setup.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index c46409ecbf52f..417588a433655 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -282,7 +282,8 @@
     # Source plugins
     # sqlalchemy-bigquery is included here since it provides an implementation of
     # a SQLalchemy-conform STRUCT type definition
-    "athena": sql_common | {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"},
+    "athena": sql_common
+    | {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"},
     "azure-ad": set(),
     "bigquery": sql_common
     | bigquery_common
@@ -354,7 +355,11 @@
     | {"psycopg2-binary", "pymysql>=1.0.2"},
     "pulsar": {"requests"},
     "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib,
-    "redshift": sql_common | redshift_common | usage_common | sqlglot_lib | {"redshift-connector"},
+    "redshift": sql_common
+    | redshift_common
+    | usage_common
+    | sqlglot_lib
+    | {"redshift-connector"},
     "redshift-legacy": sql_common | redshift_common,
     "redshift-usage-legacy": sql_common | usage_common | redshift_common,
     "s3": {*s3_base, *data_lake_profiling},
@@ -435,7 +440,9 @@
 deepdiff_dep = "deepdiff"
 test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"}
 
-debug_requirements = {"memray"}
+debug_requirements = {
+    "memray",
+}
 
 base_dev_requirements = {
     *base_requirements,
@@ -668,6 +675,7 @@
         "Documentation": "https://datahubproject.io/docs/",
         "Source": "https://github.com/datahub-project/datahub",
         "Changelog": "https://github.com/datahub-project/datahub/releases",
+        "Releases": "https://github.com/acryldata/datahub/releases",
     },
     license="Apache License 2.0",
     description="A CLI to work with DataHub metadata",

From eb0b03d2f2f2c9ce88562c32d968d095a59f8547 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 24 Oct 2023 10:45:09 -0400
Subject: [PATCH 88/98] fix(ingest/bigquery): Correctly apply table pattern to
 read events; fix end time calculation; deprecate match_fully_qualified_names
 (#9077)

---
 .../ingestion/source/bigquery_v2/bigquery_config.py    |  7 +++----
 .../datahub/ingestion/source/bigquery_v2/lineage.py    |  2 +-
 .../src/datahub/ingestion/source/bigquery_v2/usage.py  | 10 +++++++---
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 944814b6936a4..a6a740385cf5c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -119,8 +119,8 @@ class BigQueryV2Config(
     )
 
     match_fully_qualified_names: bool = Field(
-        default=False,
-        description="Whether `dataset_pattern` is matched against fully qualified dataset name `<project_id>.<dataset_name>`.",
+        default=True,
+        description="[deprecated] Whether `dataset_pattern` is matched against fully qualified dataset name `<project_id>.<dataset_name>`.",
     )
 
     include_external_url: bool = Field(
@@ -327,8 +327,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
         ):
             logger.warning(
                 "Please update `dataset_pattern` to match against fully qualified schema name `<project_id>.<dataset_name>` and set config `match_fully_qualified_names : True`."
-                "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
-                "The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`."
+                "The config option `match_fully_qualified_names` is deprecated and will be removed in a future release."
             )
         return values
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
index 98c8cbaf85eec..aa462435b8105 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@@ -548,7 +548,7 @@ def _get_parsed_audit_log_events(self, project_id: str) -> Iterable[QueryEvent]:
         # handle the case where the read happens within our time range but the query
         # completion event is delayed and happens after the configured end time.
         corrected_start_time = self.start_time - self.config.max_query_duration
-        corrected_end_time = self.end_time + -self.config.max_query_duration
+        corrected_end_time = self.end_time + self.config.max_query_duration
         self.report.log_entry_start_time = corrected_start_time
         self.report.log_entry_end_time = corrected_end_time
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
index 201567e104a51..7fc38991e5928 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
@@ -335,8 +335,12 @@ def get_time_window(self) -> Tuple[datetime, datetime]:
     def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool:
         return (
             table_ref is not None
-            and self.config.dataset_pattern.allowed(table_ref.table_identifier.dataset)
-            and self.config.table_pattern.allowed(table_ref.table_identifier.table)
+            and self.config.dataset_pattern.allowed(
+                f"{table_ref.table_identifier.project_id}.{table_ref.table_identifier.dataset}"
+                if self.config.match_fully_qualified_names
+                else table_ref.table_identifier.dataset
+            )
+            and self.config.table_pattern.allowed(str(table_ref.table_identifier))
         )
 
     def _should_ingest_usage(self) -> bool:
@@ -844,7 +848,7 @@ def _get_parsed_bigquery_log_events(
         # handle the case where the read happens within our time range but the query
         # completion event is delayed and happens after the configured end time.
         corrected_start_time = self.start_time - self.config.max_query_duration
-        corrected_end_time = self.end_time + -self.config.max_query_duration
+        corrected_end_time = self.end_time + self.config.max_query_duration
         self.report.audit_start_time = corrected_start_time
         self.report.audit_end_time = corrected_end_time
 

From d13553f53ad9e7592256cd88e78eef0ca95832e4 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 24 Oct 2023 12:24:50 -0700
Subject: [PATCH 89/98] feat(sqlparser): extract CLL from `update`s (#9078)

---
 .../src/datahub/utilities/sqlglot_lineage.py  | 68 +++++++++++--
 .../test_snowflake_update_from_table.json     | 56 +++++++++++
 .../test_snowflake_update_hardcoded.json      | 35 +++++++
 .../unit/sql_parsing/test_sqlglot_lineage.py  | 96 +++++++++++++++++++
 4 files changed, 246 insertions(+), 9 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json

diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 97121b368f507..526d90b2a1bfa 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -745,6 +745,47 @@ def _extract_select_from_create(
         return statement
 
 
+_UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT: Set[str] = set(
+    sqlglot.exp.Update.arg_types.keys()
+) - set(sqlglot.exp.Select.arg_types.keys())
+
+
+def _extract_select_from_update(
+    statement: sqlglot.exp.Update,
+) -> sqlglot.exp.Select:
+    statement = statement.copy()
+
+    # The "SET" expressions need to be converted.
+    # For the update command, it'll be a list of EQ expressions, but the select
+    # should contain aliased columns.
+    new_expressions = []
+    for expr in statement.expressions:
+        if isinstance(expr, sqlglot.exp.EQ) and isinstance(
+            expr.left, sqlglot.exp.Column
+        ):
+            new_expressions.append(
+                sqlglot.exp.Alias(
+                    this=expr.right,
+                    alias=expr.left.this,
+                )
+            )
+        else:
+            # If we don't know how to convert it, just leave it as-is. If this causes issues,
+            # they'll get caught later.
+            new_expressions.append(expr)
+
+    return sqlglot.exp.Select(
+        **{
+            **{
+                k: v
+                for k, v in statement.args.items()
+                if k not in _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT
+            },
+            "expressions": new_expressions,
+        }
+    )
+
+
 def _is_create_table_ddl(statement: sqlglot.exp.Expression) -> bool:
     return isinstance(statement, sqlglot.exp.Create) and isinstance(
         statement.this, sqlglot.exp.Schema
@@ -767,6 +808,9 @@ def _try_extract_select(
     elif isinstance(statement, sqlglot.exp.Insert):
         # TODO Need to map column renames in the expressions part of the statement.
         statement = statement.expression
+    elif isinstance(statement, sqlglot.exp.Update):
+        # Assumption: the output table is already captured in the modified tables list.
+        statement = _extract_select_from_update(statement)
     elif isinstance(statement, sqlglot.exp.Create):
         # TODO May need to map column renames.
         # Assumption: the output table is already captured in the modified tables list.
@@ -942,19 +986,25 @@ def _sqlglot_lineage_inner(
     )
 
     # Simplify the input statement for column-level lineage generation.
-    select_statement = _try_extract_select(statement)
+    try:
+        select_statement = _try_extract_select(statement)
+    except Exception as e:
+        logger.debug(f"Failed to extract select from statement: {e}", exc_info=True)
+        debug_info.column_error = e
+        select_statement = None
 
     # Generate column-level lineage.
     column_lineage: Optional[List[_ColumnLineageInfo]] = None
     try:
-        column_lineage = _column_level_lineage(
-            select_statement,
-            dialect=dialect,
-            input_tables=table_name_schema_mapping,
-            output_table=downstream_table,
-            default_db=default_db,
-            default_schema=default_schema,
-        )
+        if select_statement is not None:
+            column_lineage = _column_level_lineage(
+                select_statement,
+                dialect=dialect,
+                input_tables=table_name_schema_mapping,
+                output_table=downstream_table,
+                default_db=default_db,
+                default_schema=default_schema,
+            )
     except UnsupportedStatementTypeError as e:
         # Inject details about the outer statement type too.
         e.args = (f"{e.args[0]} (outer statement type: {type(statement)})",)
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json
new file mode 100644
index 0000000000000..e2baa34e7fe28
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json
@@ -0,0 +1,56 @@
+{
+    "query_type": "UPDATE",
+    "in_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)",
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)"
+    ],
+    "out_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)"
+    ],
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)",
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)",
+                    "column": "col1"
+                },
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)",
+                    "column": "col2"
+                }
+            ]
+        },
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)",
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)",
+                    "column": "col1"
+                },
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)",
+                    "column": "col2"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json
new file mode 100644
index 0000000000000..b41ed61b37cdb
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json
@@ -0,0 +1,35 @@
+{
+    "query_type": "UPDATE",
+    "in_tables": [],
+    "out_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
+    ],
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                "column": "orderkey",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "INT"
+            },
+            "upstreams": []
+        },
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                "column": "totalprice",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "INT"
+            },
+            "upstreams": []
+        }
+    ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 059add8db67e4..dfc5b486abd35 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -3,6 +3,7 @@
 import pytest
 
 from datahub.testing.check_sql_parser_result import assert_sql_result
+from datahub.utilities.sqlglot_lineage import _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT
 
 RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens"
 
@@ -672,3 +673,98 @@ def test_teradata_default_normalization():
         },
         expected_file=RESOURCE_DIR / "test_teradata_default_normalization.json",
     )
+
+
+def test_snowflake_update_hardcoded():
+    assert_sql_result(
+        """
+UPDATE snowflake_sample_data.tpch_sf1.orders
+SET orderkey = 1, totalprice = 2
+WHERE orderkey = 3
+""",
+        dialect="snowflake",
+        schemas={
+            "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": {
+                "orderkey": "NUMBER(38,0)",
+                "totalprice": "NUMBER(12,2)",
+            },
+        },
+        expected_file=RESOURCE_DIR / "test_snowflake_update_hardcoded.json",
+    )
+
+
+def test_update_from_select():
+    assert _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT == {"returning", "this"}
+
+
+def test_snowflake_update_from_table():
+    # Can create these tables with the following SQL:
+    """
+    -- Create or replace my_table
+    CREATE OR REPLACE TABLE my_table (
+        id INT IDENTITY PRIMARY KEY,
+        col1 VARCHAR(50),
+        col2 VARCHAR(50)
+    );
+
+    -- Create or replace table1
+    CREATE OR REPLACE TABLE table1 (
+        id INT IDENTITY PRIMARY KEY,
+        col1 VARCHAR(50),
+        col2 VARCHAR(50)
+    );
+
+    -- Create or replace table2
+    CREATE OR REPLACE TABLE table2 (
+        id INT IDENTITY PRIMARY KEY,
+        col2 VARCHAR(50)
+    );
+
+    -- Insert data into my_table
+    INSERT INTO my_table (col1, col2)
+    VALUES ('foo', 'bar'),
+           ('baz', 'qux');
+
+    -- Insert data into table1
+    INSERT INTO table1 (col1, col2)
+    VALUES ('foo', 'bar'),
+           ('baz', 'qux');
+
+    -- Insert data into table2
+    INSERT INTO table2 (col2)
+    VALUES ('bar'),
+           ('qux');
+    """
+
+    assert_sql_result(
+        """
+UPDATE my_table
+SET
+    col1 = t1.col1 || t1.col2,
+    col2 = t1.col1 || t2.col2
+FROM table1 t1
+JOIN table2 t2 ON t1.id = t2.id
+WHERE my_table.id = t1.id;
+""",
+        dialect="snowflake",
+        default_db="my_db",
+        default_schema="my_schema",
+        schemas={
+            "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)": {
+                "id": "NUMBER(38,0)",
+                "col1": "VARCHAR(16777216)",
+                "col2": "VARCHAR(16777216)",
+            },
+            "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)": {
+                "id": "NUMBER(38,0)",
+                "col1": "VARCHAR(16777216)",
+                "col2": "VARCHAR(16777216)",
+            },
+            "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)": {
+                "id": "NUMBER(38,0)",
+                "col1": "VARCHAR(16777216)",
+                "col2": "VARCHAR(16777216)",
+            },
+        },
+        expected_file=RESOURCE_DIR / "test_snowflake_update_from_table.json",
+    )

From 378d84a346cff4061f795dd1b296bde3ea5313c1 Mon Sep 17 00:00:00 2001
From: skrydal <piotr.skrydalewicz@gmail.com>
Date: Tue, 24 Oct 2023 22:12:11 +0200
Subject: [PATCH 90/98] fix(ui): Fixes handling of resources filters in UI
 (#9087)

---
 .../app/permissions/policy/PolicyDetailsModal.tsx |  4 ++--
 .../permissions/policy/PolicyPrivilegeForm.tsx    | 15 ++++++---------
 .../src/app/permissions/policy/policyUtils.ts     |  4 ++--
 docs/authorization/policies.md                    |  8 ++++----
 metadata-ingestion/tests/unit/serde/test_serde.py |  8 ++++----
 5 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx
index 68e91983babdb..d3e01df3a66e8 100644
--- a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx
+++ b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx
@@ -67,8 +67,8 @@ export default function PolicyDetailsModal({ policy, visible, onClose, privilege
     const isMetadataPolicy = policy?.type === PolicyType.Metadata;
 
     const resources = convertLegacyResourceFilter(policy?.resources);
-    const resourceTypes = getFieldValues(resources?.filter, 'RESOURCE_TYPE') || [];
-    const resourceEntities = getFieldValues(resources?.filter, 'RESOURCE_URN') || [];
+    const resourceTypes = getFieldValues(resources?.filter, 'TYPE') || [];
+    const resourceEntities = getFieldValues(resources?.filter, 'URN') || [];
     const domains = getFieldValues(resources?.filter, 'DOMAIN') || [];
 
     const {
diff --git a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx
index 1520388a5033a..b8e1505fceaec 100644
--- a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx
+++ b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx
@@ -67,8 +67,8 @@ export default function PolicyPrivilegeForm({
     } = useAppConfig();
 
     const resources: ResourceFilter = convertLegacyResourceFilter(maybeResources) || EMPTY_POLICY.resources;
-    const resourceTypes = getFieldValues(resources.filter, 'RESOURCE_TYPE') || [];
-    const resourceEntities = getFieldValues(resources.filter, 'RESOURCE_URN') || [];
+    const resourceTypes = getFieldValues(resources.filter, 'TYPE') || [];
+    const resourceEntities = getFieldValues(resources.filter, 'URN') || [];
 
     const getDisplayName = (entity) => {
         if (!entity) {
@@ -145,10 +145,7 @@ export default function PolicyPrivilegeForm({
         };
         setResources({
             ...resources,
-            filter: setFieldValues(filter, 'RESOURCE_TYPE', [
-                ...resourceTypes,
-                createCriterionValue(selectedResourceType),
-            ]),
+            filter: setFieldValues(filter, 'TYPE', [...resourceTypes, createCriterionValue(selectedResourceType)]),
         });
     };
 
@@ -160,7 +157,7 @@ export default function PolicyPrivilegeForm({
             ...resources,
             filter: setFieldValues(
                 filter,
-                'RESOURCE_TYPE',
+                'TYPE',
                 resourceTypes?.filter((criterionValue) => criterionValue.value !== deselectedResourceType),
             ),
         });
@@ -173,7 +170,7 @@ export default function PolicyPrivilegeForm({
         };
         setResources({
             ...resources,
-            filter: setFieldValues(filter, 'RESOURCE_URN', [
+            filter: setFieldValues(filter, 'URN', [
                 ...resourceEntities,
                 createCriterionValueWithEntity(
                     resource,
@@ -192,7 +189,7 @@ export default function PolicyPrivilegeForm({
             ...resources,
             filter: setFieldValues(
                 filter,
-                'RESOURCE_URN',
+                'URN',
                 resourceEntities?.filter((criterionValue) => criterionValue.value !== resource),
             ),
         });
diff --git a/datahub-web-react/src/app/permissions/policy/policyUtils.ts b/datahub-web-react/src/app/permissions/policy/policyUtils.ts
index c7af7342f6efa..2f178fcdeb5c3 100644
--- a/datahub-web-react/src/app/permissions/policy/policyUtils.ts
+++ b/datahub-web-react/src/app/permissions/policy/policyUtils.ts
@@ -99,10 +99,10 @@ export const convertLegacyResourceFilter = (resourceFilter: Maybe<ResourceFilter
     }
     const criteria = new Array<PolicyMatchCriterion>();
     if (resourceFilter.type) {
-        criteria.push(createCriterion('RESOURCE_TYPE', [createCriterionValue(resourceFilter.type)]));
+        criteria.push(createCriterion('TYPE', [createCriterionValue(resourceFilter.type)]));
     }
     if (resourceFilter.resources && resourceFilter.resources.length > 0) {
-        criteria.push(createCriterion('RESOURCE_URN', resourceFilter.resources.map(createCriterionValue)));
+        criteria.push(createCriterion('URN', resourceFilter.resources.map(createCriterionValue)));
     }
     return {
         filter: {
diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md
index e3606f2a3e48d..63aa6688d3eec 100644
--- a/docs/authorization/policies.md
+++ b/docs/authorization/policies.md
@@ -137,7 +137,7 @@ We currently support the following:
 #### Resources
 
 Resource filter defines the set of resources that the policy applies to is defined using a list of criteria. Each
-criterion defines a field type (like resource_type, resource_urn, domain), a list of field values to compare, and a
+criterion defines a field type (like type, urn, domain), a list of field values to compare, and a
 condition (like EQUALS). It essentially checks whether the field of a certain resource matches any of the input values.
 Note, that if there are no criteria or resource is not set, policy is applied to ALL resources.
 
@@ -149,7 +149,7 @@ For example, the following resource filter will apply the policy to datasets, ch
       "filter": {
         "criteria": [
           {
-            "field": "RESOURCE_TYPE",
+            "field": "TYPE",
             "condition": "EQUALS",
             "values": [
               "dataset",
@@ -175,8 +175,8 @@ Supported fields are as follows
 
 | Field Type    | Description            | Example                 |
 |---------------|------------------------|-------------------------|
-| resource_type | Type of the resource   | dataset, chart, dataJob |
-| resource_urn  | Urn of the resource    | urn:li:dataset:...      |
+| type | Type of the resource   | dataset, chart, dataJob |
+| urn  | Urn of the resource    | urn:li:dataset:...      |
 | domain        | Domain of the resource | urn:li:domain:domainX   |
 
 ## Managing Policies
diff --git a/metadata-ingestion/tests/unit/serde/test_serde.py b/metadata-ingestion/tests/unit/serde/test_serde.py
index d116f1f5473fa..d2d6a0bdda5b9 100644
--- a/metadata-ingestion/tests/unit/serde/test_serde.py
+++ b/metadata-ingestion/tests/unit/serde/test_serde.py
@@ -238,7 +238,7 @@ def test_missing_optional_simple() -> None:
                 "criteria": [
                     {
                         "condition": "EQUALS",
-                        "field": "RESOURCE_TYPE",
+                        "field": "TYPE",
                         "values": ["notebook", "dataset", "dashboard"],
                     }
                 ]
@@ -252,7 +252,7 @@ def test_missing_optional_simple() -> None:
             "criteria": [
                 {
                     "condition": "EQUALS",
-                    "field": "RESOURCE_TYPE",
+                    "field": "TYPE",
                     "values": ["notebook", "dataset", "dashboard"],
                 }
             ]
@@ -267,13 +267,13 @@ def test_missing_optional_simple() -> None:
 def test_missing_optional_in_union() -> None:
     # This one doesn't contain any optional fields and should work fine.
     revised_json = json.loads(
-        '{"lastUpdatedTimestamp":1662356745807,"actors":{"groups":[],"resourceOwners":false,"allUsers":true,"allGroups":false,"users":[]},"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"displayName":"customtest","resources":{"filter":{"criteria":[{"field":"RESOURCE_TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]},"allResources":false},"description":"","state":"ACTIVE","type":"METADATA"}'
+        '{"lastUpdatedTimestamp":1662356745807,"actors":{"groups":[],"resourceOwners":false,"allUsers":true,"allGroups":false,"users":[]},"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"displayName":"customtest","resources":{"filter":{"criteria":[{"field":"TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]},"allResources":false},"description":"","state":"ACTIVE","type":"METADATA"}'
     )
     revised = models.DataHubPolicyInfoClass.from_obj(revised_json)
 
     # This one is missing the optional filters.allResources field.
     original_json = json.loads(
-        '{"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"actors":{"resourceOwners":false,"groups":[],"allGroups":false,"allUsers":true,"users":[]},"lastUpdatedTimestamp":1662356745807,"displayName":"customtest","description":"","resources":{"filter":{"criteria":[{"field":"RESOURCE_TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]}},"state":"ACTIVE","type":"METADATA"}'
+        '{"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"actors":{"resourceOwners":false,"groups":[],"allGroups":false,"allUsers":true,"users":[]},"lastUpdatedTimestamp":1662356745807,"displayName":"customtest","description":"","resources":{"filter":{"criteria":[{"field":"TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]}},"state":"ACTIVE","type":"METADATA"}'
     )
     original = models.DataHubPolicyInfoClass.from_obj(original_json)
 

From edb82ad91fba8a401c56b82bc4c2916a39a6a6dd Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 24 Oct 2023 18:56:14 -0400
Subject: [PATCH 91/98] docs(ingest/bigquery): Add docs for breaking change:
 match_fully_qualified_names (#9094)

---
 docs/how/updating-datahub.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 3af3b2bdda215..7d8c25b06255a 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -11,11 +11,17 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
   by Looker and LookML source connectors.
 - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
 - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
-- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
+- #8943 - The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
 This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future.
 If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
 Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
 `datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
+- #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`.
+This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully
+qualified dataset name, i.e. `<project_name>.<dataset_name>`. If this is not the case, please
+update your pattern (e.g. prepend your old dataset pattern with `.*\.` which matches the project part), 
+or set `match_fully_qualified_names: false` in your recipe. However, note that
+setting this to `false` is deprecated and this flag will be removed entirely in a future release.
 
 ### Potential Downtime
 

From fe18532b29e35af1cd3007e6affc102042b1af61 Mon Sep 17 00:00:00 2001
From: skrydal <piotr.skrydalewicz@gmail.com>
Date: Wed, 25 Oct 2023 00:58:56 +0200
Subject: [PATCH 92/98] docs(update): Added info on breaking change for
 policies (#9093)

Co-authored-by: Pedro Silva <pedro@acryl.io>
---
 docs/how/updating-datahub.md | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 7d8c25b06255a..57193ea69f2be 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -16,6 +16,39 @@ This is currently enabled by default to preserve compatibility, but will be disa
 If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
 Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
 `datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
+- #8846 - Changed enum values in resource filters used by policies. `RESOURCE_TYPE` became `TYPE` and `RESOURCE_URN` became `URN`.
+Any existing policies using these filters (i.e. defined for particular `urns` or `types` such as `dataset`) need to be upgraded
+manually, for example by retrieving their respective `dataHubPolicyInfo` aspect and changing part using filter i.e.
+```yaml
+   "resources": {
+     "filter": {
+       "criteria": [
+         {
+           "field": "RESOURCE_TYPE",
+           "condition": "EQUALS",
+           "values": [
+             "dataset"
+           ]
+         }
+       ]
+     }
+```
+into
+```yaml
+   "resources": {
+     "filter": {
+       "criteria": [
+         {
+           "field": "TYPE",
+           "condition": "EQUALS",
+           "values": [
+             "dataset"
+           ]
+         }
+       ]
+     }
+```
+for example, using `datahub put` command. Policies can be also removed and re-created via UI.
 - #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`.
 This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully
 qualified dataset name, i.e. `<project_name>.<dataset_name>`. If this is not the case, please

From ca331f58bd24187f9f0ca317216837178e9f41fa Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Wed, 25 Oct 2023 09:39:57 +0900
Subject: [PATCH 93/98] docs: add luckyorange script to head (#9080)

---
 docs-website/docusaurus.config.js | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index 68ea1ebffa6c9..259ef970d818e 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -13,6 +13,13 @@ module.exports = {
   projectName: "datahub", // Usually your repo name.
   staticDirectories: ["static", "genStatic"],
   stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"],
+  scripts: [
+    {
+      src: "https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38",
+      async: true,
+      defer: true,
+    },
+  ],
   noIndex: isSaas,
   customFields: {
     isSaas: isSaas,

From 9a59c452bf36d750964f6d7f78df84a8c0c5eb66 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Wed, 25 Oct 2023 09:40:28 +0900
Subject: [PATCH 94/98] design: refactor docs navbar (#8975)

Co-authored-by: Jeff Merrick <jeff@wireform.io>
---
 docs-website/docusaurus.config.js             | 61 +++++++++----------
 docs-website/src/styles/global.scss           | 27 +++++---
 .../DocsVersionDropdownNavbarItem.js          |  4 ++
 .../src/theme/NavbarItem/styles.module.scss   |  8 +++
 4 files changed, 59 insertions(+), 41 deletions(-)
 create mode 100644 docs-website/src/theme/NavbarItem/styles.module.scss

diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index 259ef970d818e..506e263933394 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -57,44 +57,41 @@ module.exports = {
           position: "right",
         },
         {
-          to: "https://demo.datahubproject.io/",
-          label: "Demo",
-          position: "right",
-        },
-        {
-          href: "https://blog.datahubproject.io/",
-          label: "Blog",
-          position: "right",
-        },
-        {
-          href: "https://feature-requests.datahubproject.io/roadmap",
-          label: "Roadmap",
+          type: "dropdown",
+          label: "Resources",
           position: "right",
+          items: [
+            {
+              href: "https://demo.datahubproject.io/",
+              label: "Demo",
+            },
+            {
+              href: "https://blog.datahubproject.io/",
+              label: "Blog",
+            },
+            {
+              href: "https://feature-requests.datahubproject.io/roadmap",
+              label: "Roadmap",
+            },
+            {
+              href: "https://slack.datahubproject.io",
+              label: "Slack",
+            },
+            {
+              href: "https://github.com/datahub-project/datahub",
+              label: "GitHub",
+            },
+            {
+              href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w",
+              label: "YouTube",
+            },
+          ],
         },
         {
           type: "docsVersionDropdown",
-          position: "right",
+          position: "left",
           dropdownActiveClassDisabled: true,
         },
-        {
-          href: "https://slack.datahubproject.io",
-          "aria-label": "Slack",
-          position: "right",
-          className: "item__icon item__slack",
-        },
-        {
-          href: "https://github.com/datahub-project/datahub",
-          "aria-label": "GitHub",
-          position: "right",
-          className: "item__icon item__github",
-        },
-
-        {
-          href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w",
-          "aria-label": "YouTube",
-          position: "right",
-          className: "item__icon item__youtube",
-        },
       ],
     },
     footer: {
diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss
index 55a54876b41ac..16e3893ed08b7 100644
--- a/docs-website/src/styles/global.scss
+++ b/docs-website/src/styles/global.scss
@@ -144,20 +144,29 @@ div[class^="announcementBar"] {
 
 /** Navbar */
 
-@media only screen and (max-width: 1050px) {
-  .navbar__toggle {
-    display: inherit;
-  }
-  .navbar__item {
-    display: none;
-  }
-}
-
 .navbar {
   .navbar__logo {
     height: 3rem;
   }
+
+  .navbar__link {
+    align-items: center;
+    margin: 0 1rem 0;
+    padding: 0;
+    border-bottom: 2px solid transparent;
+  }
+
+  .dropdown > .navbar__link:after {
+    top: -1px;
+    border-width: 0.3em 0.3em 0;
+    margin-left: 0.4em;
+  }
+
+  .navbar__link--active {
+    border-bottom-color: var(--ifm-navbar-link-hover-color);
+  }
   .navbar__item {
+    padding: 0.25rem 0;
     svg[class*="iconExternalLink"] {
       display: none;
     }
diff --git a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js
index cc04ab23d3cf3..661d64392e67f 100644
--- a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js
+++ b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js
@@ -6,6 +6,9 @@ import { translate } from "@docusaurus/Translate";
 import { useLocation } from "@docusaurus/router";
 import DefaultNavbarItem from "@theme/NavbarItem/DefaultNavbarItem";
 import DropdownNavbarItem from "@theme/NavbarItem/DropdownNavbarItem";
+
+import styles from "./styles.module.scss";
+
 const getVersionMainDoc = (version) => version.docs.find((doc) => doc.id === version.mainDocId);
 export default function DocsVersionDropdownNavbarItem({
   mobile,
@@ -60,6 +63,7 @@ export default function DocsVersionDropdownNavbarItem({
   return (
     <DropdownNavbarItem
       {...props}
+      className={styles.versionNavItem}
       mobile={mobile}
       label={dropdownLabel}
       to={false} // This component is Swizzled to disable the link here
diff --git a/docs-website/src/theme/NavbarItem/styles.module.scss b/docs-website/src/theme/NavbarItem/styles.module.scss
new file mode 100644
index 0000000000000..d533115a9c9f0
--- /dev/null
+++ b/docs-website/src/theme/NavbarItem/styles.module.scss
@@ -0,0 +1,8 @@
+.versionNavItem {
+  margin-left: 0 !important;
+  padding: 0.2em 1em !important;
+  display: block;
+  border-radius: var(--ifm-button-border-radius) !important;
+  color: var(--ifm-menu-color-active);
+  background: var(--ifm-menu-color-background-active);
+}

From 916235d31a7d914b51a44ee02374270e5a4ea1d3 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 24 Oct 2023 19:59:42 -0700
Subject: [PATCH 95/98] fix(ingest): update athena type mapping (#9061)

---
 .../src/datahub/ingestion/source/sql/athena.py      |  4 +++-
 .../src/datahub/ingestion/source/sql/sql_common.py  |  5 +----
 .../src/datahub/ingestion/source/sql/sql_types.py   |  5 ++---
 .../datahub/utilities/sqlalchemy_type_converter.py  | 13 ++++++++++---
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index 06b9ad92677a2..75e8fe1d6f7a6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -37,7 +37,7 @@
     gen_database_key,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
-from datahub.metadata.schema_classes import RecordTypeClass
+from datahub.metadata.schema_classes import MapTypeClass, RecordTypeClass
 from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column
 from datahub.utilities.sqlalchemy_type_converter import (
     MapType,
@@ -46,7 +46,9 @@
 
 logger = logging.getLogger(__name__)
 
+assert STRUCT, "required type modules are not available"
 register_custom_type(STRUCT, RecordTypeClass)
+register_custom_type(MapType, MapTypeClass)
 
 
 class CustomAthenaRestDialect(AthenaRestDialect):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index be03858ec3ef9..fad9b9e8018a5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -80,7 +80,6 @@
     DatasetLineageTypeClass,
     DatasetPropertiesClass,
     GlobalTagsClass,
-    MapTypeClass,
     SubTypesClass,
     TagAssociationClass,
     UpstreamClass,
@@ -90,7 +89,6 @@
 from datahub.utilities.lossy_collections import LossyList
 from datahub.utilities.registries.domain_registry import DomainRegistry
 from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
-from datahub.utilities.sqlalchemy_type_converter import MapType
 
 if TYPE_CHECKING:
     from datahub.ingestion.source.ge_data_profiler import (
@@ -140,6 +138,7 @@ class SqlWorkUnit(MetadataWorkUnit):
 
 
 _field_type_mapping: Dict[Type[TypeEngine], Type] = {
+    # Note: to add dialect-specific types to this mapping, use the `register_custom_type` function.
     types.Integer: NumberTypeClass,
     types.Numeric: NumberTypeClass,
     types.Boolean: BooleanTypeClass,
@@ -156,8 +155,6 @@ class SqlWorkUnit(MetadataWorkUnit):
     types.DATETIME: TimeTypeClass,
     types.TIMESTAMP: TimeTypeClass,
     types.JSON: RecordTypeClass,
-    # additional type definitions that are used by the Athena source
-    MapType: MapTypeClass,  # type: ignore
     # Because the postgresql dialect is used internally by many other dialects,
     # we add some postgres types here. This is ok to do because the postgresql
     # dialect is built-in to sqlalchemy.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
index ae47623188f42..3b4a7e1dc0287 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
@@ -7,7 +7,7 @@
     BytesType,
     DateType,
     EnumType,
-    MapType as MapTypeAvro,
+    MapType,
     NullType,
     NumberType,
     RecordType,
@@ -15,7 +15,6 @@
     TimeType,
     UnionType,
 )
-from datahub.utilities.sqlalchemy_type_converter import MapType
 
 # these can be obtained by running `select format_type(oid, null),* from pg_type;`
 # we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
@@ -364,7 +363,7 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
     "time": TimeType,
     "timestamp": TimeType,
     "row": RecordType,
-    "map": MapTypeAvro,
+    "map": MapType,
     "array": ArrayType,
 }
 
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
index 1d5ec5dae3519..5d2fc6872c7bd 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
@@ -4,7 +4,6 @@
 from typing import Any, Dict, List, Optional, Type, Union
 
 from sqlalchemy import types
-from sqlalchemy_bigquery import STRUCT
 
 from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
 from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
@@ -12,6 +11,12 @@
 
 logger = logging.getLogger(__name__)
 
+try:
+    # This is used for both BigQuery and Athena.
+    from sqlalchemy_bigquery import STRUCT
+except ImportError:
+    STRUCT = None
+
 
 class MapType(types.TupleType):
     # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub
@@ -42,7 +47,9 @@ def get_avro_type(
     ) -> Dict[str, Any]:
         """Determines the concrete AVRO schema type for a SQLalchemy-typed column"""
 
-        if type(column_type) in cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys():
+        if isinstance(
+            column_type, tuple(cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys())
+        ):
             return {
                 "type": cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE[type(column_type)],
                 "native_data_type": str(column_type),
@@ -88,7 +95,7 @@ def get_avro_type(
                 "key_type": cls.get_avro_type(column_type=key_type, nullable=nullable),
                 "key_native_data_type": str(key_type),
             }
-        if isinstance(column_type, STRUCT):
+        if STRUCT and isinstance(column_type, STRUCT):
             fields = []
             for field_def in column_type._STRUCT_fields:
                 field_name, field_type = field_def

From 2d1584b12fe4a40a077457e618f0937132763586 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 24 Oct 2023 23:08:24 -0400
Subject: [PATCH 96/98] feat(ingest/datahub-source): Allow ingesting aspects
 from the entitiesV2 API (#9089)

---
 .../ingestion/source/datahub/config.py        | 19 ++++++-
 .../source/datahub/datahub_api_reader.py      | 49 +++++++++++++++++++
 .../source/datahub/datahub_source.py          | 16 ++++++
 3 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
index 053d136305527..83958dc76754f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
@@ -1,3 +1,4 @@
+import os
 from typing import Optional
 
 from pydantic import Field, root_validator
@@ -67,9 +68,25 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
         ),
     )
 
+    pull_from_datahub_api: bool = Field(
+        default=False,
+        description="Use the DataHub API to fetch versioned aspects.",
+        hidden_from_docs=True,
+    )
+
+    max_workers: int = Field(
+        default=5 * (os.cpu_count() or 4),
+        description="Number of worker threads to use for datahub api ingestion.",
+        hidden_from_docs=True,
+    )
+
     @root_validator
     def check_ingesting_data(cls, values):
-        if not values.get("database_connection") and not values.get("kafka_connection"):
+        if (
+            not values.get("database_connection")
+            and not values.get("kafka_connection")
+            and not values.get("pull_from_datahub_api")
+        ):
             raise ValueError(
                 "Your current config will not ingest any data."
                 " Please specify at least one of `database_connection` or `kafka_connection`, ideally both."
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py
new file mode 100644
index 0000000000000..7ee36736723b2
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py
@@ -0,0 +1,49 @@
+import logging
+from concurrent import futures
+from typing import Dict, Iterable, List
+
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.ingestion.graph.filters import RemovedStatusFilter
+from datahub.ingestion.source.datahub.config import DataHubSourceConfig
+from datahub.ingestion.source.datahub.report import DataHubSourceReport
+from datahub.metadata._schema_classes import _Aspect
+
+logger = logging.getLogger(__name__)
+
+# Should work for at least mysql, mariadb, postgres
+DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
+
+
+class DataHubApiReader:
+    def __init__(
+        self,
+        config: DataHubSourceConfig,
+        report: DataHubSourceReport,
+        graph: DataHubGraph,
+    ):
+        self.config = config
+        self.report = report
+        self.graph = graph
+
+    def get_aspects(self) -> Iterable[MetadataChangeProposalWrapper]:
+        urns = self.graph.get_urns_by_filter(
+            status=RemovedStatusFilter.ALL,
+            batch_size=self.config.database_query_batch_size,
+        )
+        tasks: List[futures.Future[Iterable[MetadataChangeProposalWrapper]]] = []
+        with futures.ThreadPoolExecutor(
+            max_workers=self.config.max_workers
+        ) as executor:
+            for urn in urns:
+                tasks.append(executor.submit(self._get_aspects_for_urn, urn))
+            for task in futures.as_completed(tasks):
+                yield from task.result()
+
+    def _get_aspects_for_urn(self, urn: str) -> Iterable[MetadataChangeProposalWrapper]:
+        aspects: Dict[str, _Aspect] = self.graph.get_entity_semityped(urn)  # type: ignore
+        for aspect in aspects.values():
+            yield MetadataChangeProposalWrapper(
+                entityUrn=urn,
+                aspect=aspect,
+            )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
index 2368febe1ff57..a2f43b8cc62cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py
@@ -15,6 +15,7 @@
 from datahub.ingestion.api.source_helpers import auto_workunit_reporter
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.datahub.config import DataHubSourceConfig
+from datahub.ingestion.source.datahub.datahub_api_reader import DataHubApiReader
 from datahub.ingestion.source.datahub.datahub_database_reader import (
     DataHubDatabaseReader,
 )
@@ -58,6 +59,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         logger.info(f"Ingesting DataHub metadata up until {self.report.stop_time}")
         state = self.stateful_ingestion_handler.get_last_run_state()
 
+        if self.config.pull_from_datahub_api:
+            yield from self._get_api_workunits()
+
         if self.config.database_connection is not None:
             yield from self._get_database_workunits(
                 from_createdon=state.database_createdon_datetime
@@ -139,6 +143,18 @@ def _get_kafka_workunits(
                     )
                 self._commit_progress(i)
 
+    def _get_api_workunits(self) -> Iterable[MetadataWorkUnit]:
+        if self.ctx.graph is None:
+            self.report.report_failure(
+                "datahub_api",
+                "Specify datahub_api on your ingestion recipe to ingest from the DataHub API",
+            )
+            return
+
+        reader = DataHubApiReader(self.config, self.report, self.ctx.graph)
+        for mcp in reader.get_aspects():
+            yield mcp.as_workunit()
+
     def _commit_progress(self, i: Optional[int] = None) -> None:
         """Commit progress to stateful storage, if there have been no errors.
 

From b612545220d9329696eaa26d6b42439cdf01fb95 Mon Sep 17 00:00:00 2001
From: siddiquebagwan-gslab <mohdsiddique.bagwan@gslab.com>
Date: Wed, 25 Oct 2023 15:26:06 +0530
Subject: [PATCH 97/98] feat(ingestion/redshift): support
 auto_incremental_lineage (#9010)

---
 docs/how/updating-datahub.md                      |  2 ++
 metadata-ingestion/setup.py                       | 10 +++-------
 .../datahub/ingestion/source/redshift/config.py   |  8 +++++++-
 .../datahub/ingestion/source/redshift/redshift.py | 15 +++++++++++++--
 .../tests/unit/test_redshift_config.py            |  6 ++++++
 5 files changed, 31 insertions(+), 10 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/test_redshift_config.py

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 57193ea69f2be..8813afee65be9 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -4,6 +4,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 
 ## Next
 
+- #9010 - In Redshift source's config `incremental_lineage` is set default to off.
+
 ### Breaking Changes
 
 - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 417588a433655..72b0e776a0da5 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -355,13 +355,9 @@
     | {"psycopg2-binary", "pymysql>=1.0.2"},
     "pulsar": {"requests"},
     "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib,
-    "redshift": sql_common
-    | redshift_common
-    | usage_common
-    | sqlglot_lib
-    | {"redshift-connector"},
-    "redshift-legacy": sql_common | redshift_common,
-    "redshift-usage-legacy": sql_common | usage_common | redshift_common,
+    "redshift": sql_common | redshift_common | usage_common | {"redshift-connector"} | sqlglot_lib,
+    "redshift-legacy": sql_common | redshift_common | sqlglot_lib,
+    "redshift-usage-legacy": sql_common | redshift_common | sqlglot_lib | usage_common,
     "s3": {*s3_base, *data_lake_profiling},
     "gcs": {*s3_base, *data_lake_profiling},
     "sagemaker": aws_common,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index 2789b800940db..79b044841e054 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -133,7 +133,13 @@ class RedshiftConfig(
     )
 
     extract_column_level_lineage: bool = Field(
-        default=True, description="Whether to extract column level lineage."
+        default=True,
+        description="Whether to extract column level lineage. This config works with rest-sink only.",
+    )
+
+    incremental_lineage: bool = Field(
+        default=False,
+        description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.  This config works with rest-sink only.",
     )
 
     @root_validator(pre=True)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
index a1b6333a3775d..26237a6ce12e0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
@@ -1,5 +1,6 @@
 import logging
 from collections import defaultdict
+from functools import partial
 from typing import Dict, Iterable, List, Optional, Type, Union
 
 import humanfriendly
@@ -25,6 +26,7 @@
     platform_name,
     support_status,
 )
+from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
 from datahub.ingestion.api.source import (
     CapabilityReport,
     MetadataWorkUnitProcessor,
@@ -369,6 +371,11 @@ def gen_database_container(self, database: str) -> Iterable[MetadataWorkUnit]:
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [
             *super().get_workunit_processors(),
+            partial(
+                auto_incremental_lineage,
+                self.ctx.graph,
+                self.config.incremental_lineage,
+            ),
             StaleEntityRemovalHandler.create(
                 self, self.config, self.ctx
             ).workunit_processor,
@@ -942,7 +949,9 @@ def generate_lineage(self, database: str) -> Iterable[MetadataWorkUnit]:
                 )
                 if lineage_info:
                     yield from gen_lineage(
-                        dataset_urn, lineage_info, self.config.incremental_lineage
+                        dataset_urn,
+                        lineage_info,
+                        incremental_lineage=False,  # incremental lineage generation is taken care by auto_incremental_lineage
                     )
 
         for schema in self.db_views[database]:
@@ -956,7 +965,9 @@ def generate_lineage(self, database: str) -> Iterable[MetadataWorkUnit]:
                 )
                 if lineage_info:
                     yield from gen_lineage(
-                        dataset_urn, lineage_info, self.config.incremental_lineage
+                        dataset_urn,
+                        lineage_info,
+                        incremental_lineage=False,  # incremental lineage generation is taken care by auto_incremental_lineage
                     )
 
     def add_config_to_report(self):
diff --git a/metadata-ingestion/tests/unit/test_redshift_config.py b/metadata-ingestion/tests/unit/test_redshift_config.py
new file mode 100644
index 0000000000000..8a165e7f5f3fe
--- /dev/null
+++ b/metadata-ingestion/tests/unit/test_redshift_config.py
@@ -0,0 +1,6 @@
+from datahub.ingestion.source.redshift.config import RedshiftConfig
+
+
+def test_incremental_lineage_default_to_false():
+    config = RedshiftConfig(host_port="localhost:5439", database="test")
+    assert config.incremental_lineage is False

From 9cccd22c04bf357b574f4d9d7dae3aee633bf7d3 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Wed, 25 Oct 2023 11:01:49 +0100
Subject: [PATCH 98/98] feat(auth): Add backwards compatible field resolver
 (#9096)

---
 .../com/datahub/authorization/EntityFieldType.java  | 13 +++++++++++++
 .../authorization/DefaultEntitySpecResolver.java    | 13 +++++++------
 .../DataPlatformInstanceFieldResolverProvider.java  | 10 +++++-----
 .../DomainFieldResolverProvider.java                |  5 +++--
 .../EntityFieldResolverProvider.java                |  6 ++++--
 .../EntityTypeFieldResolverProvider.java            |  7 +++++--
 .../EntityUrnFieldResolverProvider.java             |  7 +++++--
 .../GroupMembershipFieldResolverProvider.java       |  5 +++--
 .../OwnerFieldResolverProvider.java                 |  5 +++--
 ...taPlatformInstanceFieldResolverProviderTest.java |  2 +-
 .../GroupMembershipFieldResolverProviderTest.java   |  2 +-
 11 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
index 46763f29a7040..1258d958f2092 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
@@ -4,6 +4,19 @@
  * List of entity field types to fetch for a given entity
  */
 public enum EntityFieldType {
+
+  /**
+   * Type of the entity (e.g. dataset, chart)
+   * @deprecated
+   */
+  @Deprecated
+  RESOURCE_URN,
+  /**
+   * Urn of the entity
+   * @deprecated
+   */
+  @Deprecated
+  RESOURCE_TYPE,
   /**
    * Type of the entity (e.g. dataset, chart)
    */
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
index 4ad14ed59c9c0..65b0329a9c4f2 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
@@ -1,15 +1,16 @@
 package com.datahub.authorization;
 
-import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
 import com.datahub.authentication.Authentication;
+import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.EntityFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.GroupMembershipFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.entity.client.EntityClient;
+import com.linkedin.util.Pair;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -34,7 +35,7 @@ public ResolvedEntitySpec resolve(EntitySpec entitySpec) {
 
   private Map<EntityFieldType, FieldResolver> getFieldResolvers(EntitySpec entitySpec) {
     return _entityFieldResolverProviders.stream()
-        .collect(Collectors.toMap(EntityFieldResolverProvider::getFieldType,
-            hydrator -> hydrator.getFieldResolver(entitySpec)));
+        .flatMap(resolver -> resolver.getFieldTypes().stream().map(fieldType -> Pair.of(fieldType, resolver)))
+        .collect(Collectors.toMap(Pair::getKey, pair -> pair.getValue().getFieldResolver(entitySpec)));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
index 27cb8fcee8138..cbb237654e969 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
@@ -1,8 +1,5 @@
 package com.datahub.authorization.fieldresolverprovider;
 
-import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME;
-import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME;
-
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.EntityFieldType;
 import com.datahub.authorization.EntitySpec;
@@ -14,10 +11,13 @@
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.client.EntityClient;
 import java.util.Collections;
+import java.util.List;
 import java.util.Objects;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
+import static com.linkedin.metadata.Constants.*;
+
 /**
  * Provides field resolver for domain given resourceSpec
  */
@@ -29,8 +29,8 @@ public class DataPlatformInstanceFieldResolverProvider implements EntityFieldRes
   private final Authentication _systemAuthentication;
 
   @Override
-  public EntityFieldType getFieldType() {
-    return EntityFieldType.DATA_PLATFORM_INSTANCE;
+  public List<EntityFieldType> getFieldTypes() {
+    return Collections.singletonList(EntityFieldType.DATA_PLATFORM_INSTANCE);
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
index 25c2165f02b94..15d821b75c0bd 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
@@ -14,6 +14,7 @@
 
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -37,8 +38,8 @@ public class DomainFieldResolverProvider implements EntityFieldResolverProvider
   private final Authentication _systemAuthentication;
 
   @Override
-  public EntityFieldType getFieldType() {
-    return EntityFieldType.DOMAIN;
+  public List<EntityFieldType> getFieldTypes() {
+    return Collections.singletonList(EntityFieldType.DOMAIN);
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
index a76db0ecb5102..227d403a9cd1d 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
@@ -3,6 +3,7 @@
 import com.datahub.authorization.FieldResolver;
 import com.datahub.authorization.EntityFieldType;
 import com.datahub.authorization.EntitySpec;
+import java.util.List;
 
 
 /**
@@ -11,9 +12,10 @@
 public interface EntityFieldResolverProvider {
 
   /**
-   * Field that this hydrator is hydrating
+   * List of fields that this hydrator is hydrating.
+   * @return
    */
-  EntityFieldType getFieldType();
+  List<EntityFieldType> getFieldTypes();
 
   /**
    * Return resolver for fetching the field values given the entity
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
index 187f696904947..addac84c68b18 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
@@ -3,16 +3,19 @@
 import com.datahub.authorization.FieldResolver;
 import com.datahub.authorization.EntityFieldType;
 import com.datahub.authorization.EntitySpec;
+import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList;
 import java.util.Collections;
+import java.util.List;
 
 
 /**
  * Provides field resolver for entity type given entitySpec
  */
 public class EntityTypeFieldResolverProvider implements EntityFieldResolverProvider {
+
   @Override
-  public EntityFieldType getFieldType() {
-    return EntityFieldType.TYPE;
+  public List<EntityFieldType> getFieldTypes() {
+    return ImmutableList.of(EntityFieldType.TYPE, EntityFieldType.RESOURCE_TYPE);
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
index 2f5c4a7c6c961..32960de687839 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
@@ -3,16 +3,19 @@
 import com.datahub.authorization.FieldResolver;
 import com.datahub.authorization.EntityFieldType;
 import com.datahub.authorization.EntitySpec;
+import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList;
 import java.util.Collections;
+import java.util.List;
 
 
 /**
  * Provides field resolver for entity urn given entitySpec
  */
 public class EntityUrnFieldResolverProvider implements EntityFieldResolverProvider {
+
   @Override
-  public EntityFieldType getFieldType() {
-    return EntityFieldType.URN;
+  public List<EntityFieldType> getFieldTypes() {
+    return ImmutableList.of(EntityFieldType.URN, EntityFieldType.RESOURCE_URN);
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
index 8db029632d7e2..b1202d9f4bbd3 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
@@ -13,6 +13,7 @@
 import com.linkedin.identity.NativeGroupMembership;
 import com.linkedin.metadata.Constants;
 import com.linkedin.identity.GroupMembership;
+import java.util.Collections;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
@@ -35,8 +36,8 @@ public class GroupMembershipFieldResolverProvider implements EntityFieldResolver
   private final Authentication _systemAuthentication;
 
   @Override
-  public EntityFieldType getFieldType() {
-    return EntityFieldType.GROUP_MEMBERSHIP;
+  public List<EntityFieldType> getFieldTypes() {
+    return Collections.singletonList(EntityFieldType.GROUP_MEMBERSHIP);
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
index bdd652d1d3871..3c27f9e6ce8d7 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
@@ -12,6 +12,7 @@
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.Constants;
 import java.util.Collections;
+import java.util.List;
 import java.util.stream.Collectors;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
@@ -28,8 +29,8 @@ public class OwnerFieldResolverProvider implements EntityFieldResolverProvider {
   private final Authentication _systemAuthentication;
 
   @Override
-  public EntityFieldType getFieldType() {
-    return EntityFieldType.OWNER;
+  public List<EntityFieldType> getFieldTypes() {
+    return Collections.singletonList(EntityFieldType.OWNER);
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
index b2343bbb01509..5c7d87f1c05a9 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
@@ -56,7 +56,7 @@ public void setup() {
 
   @Test
   public void shouldReturnDataPlatformInstanceType() {
-    assertEquals(EntityFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType());
+    assertEquals(EntityFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldTypes().get(0));
   }
 
   @Test
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java
index 54675045b4413..af547f14cd3fc 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java
@@ -53,7 +53,7 @@ public void setup() {
 
   @Test
   public void shouldReturnGroupsMembershipType() {
-    assertEquals(EntityFieldType.GROUP_MEMBERSHIP, groupMembershipFieldResolverProvider.getFieldType());
+    assertEquals(EntityFieldType.GROUP_MEMBERSHIP, groupMembershipFieldResolverProvider.getFieldTypes().get(0));
   }
 
   @Test