diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java index 41a1d22485ea4..76abddc9a99a9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java @@ -49,6 +49,8 @@ public CompletableFuture get(DataFetchingEnvironment environmen final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; final String query = input.getQuery() != null ? input.getQuery() : "*"; + // escape forward slash since it is a reserved character in Elasticsearch + final String sanitizedQuery = ResolverUtils.escapeForwardSlash(query); return CompletableFuture.supplyAsync(() -> { try { @@ -64,7 +66,7 @@ public CompletableFuture get(DataFetchingEnvironment environmen maybeResolvedView != null ? SearchUtils.combineFilters(filter, maybeResolvedView.getDefinition().getFilter()) : filter, - query, + sanitizedQuery, start, count, context.getAuthentication() diff --git a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx index 87efae03e0a7c..0a6316a71db16 100644 --- a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx +++ b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx @@ -33,13 +33,17 @@ const StyledLink = styled(Link)` font-weight: 700; `; +const TextContent = styled.div` + max-width: 1025px; +`; + export default function AcrylDemoBanner() { return ( - Schedule a Demo of Managed Datahub - + Schedule a Demo of Managed DataHub + DataHub is already the industry's #1 Open Source Data Catalog.{' '} Schedule a demo {' '} - of Acryl Cloud to see the advanced features that take it to the next level! - + of Acryl DataHub to see the advanced features that take it to the next level or purchase Acryl Cloud + on{' '} + + AWS Marketplace + + ! + ); diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 03b3d763ed247..b705c973cdbb5 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,6 +20,8 @@ individually enable / disable desired field metrics. ### Deprecations +- #8198: In the Python SDK, the `PlatformKey` class has been renamed to `ContainerKey`. + ### Other notable Changes ## 0.10.4 diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 9c44949741297..47727d5784a19 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -6,7 +6,17 @@ import time from enum import Enum from hashlib import md5 -from typing import Any, List, Optional, Type, TypeVar, Union, cast, get_type_hints +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Type, + TypeVar, + Union, + cast, + get_type_hints, +) import typing_inspect @@ -50,6 +60,9 @@ os.getenv("DATAHUB_DATASET_URN_TO_LOWER", "false") == "true" ) +if TYPE_CHECKING: + from datahub.emitter.mcp_builder import DatahubKey + # TODO: Delete this once lower-casing is the standard. def set_dataset_urn_to_lower(value: bool) -> None: @@ -132,7 +145,11 @@ def dataset_key_to_urn(key: DatasetKeyClass) -> str: ) -def make_container_urn(guid: str) -> str: +def make_container_urn(guid: Union[str, "DatahubKey"]) -> str: + from datahub.emitter.mcp_builder import DatahubKey + + if isinstance(guid, DatahubKey): + guid = guid.guid() return f"urn:li:container:{guid}" diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index 9051f2e82fa1f..40df214f49433 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -54,7 +54,9 @@ def guid(self) -> str: return _stable_guid_from_dict(bag) -class PlatformKey(DatahubKey): +class ContainerKey(DatahubKey): + """Base class for container guid keys. Most users should use one of the subclasses instead.""" + platform: str instance: Optional[str] = None @@ -81,8 +83,15 @@ def guid_dict(self) -> Dict[str, str]: def property_dict(self) -> Dict[str, str]: return self.dict(by_alias=True, exclude_none=True) + def as_urn(self) -> str: + return make_container_urn(guid=self.guid()) + + +# DEPRECATION: Keeping the `PlatformKey` name around for backwards compatibility. +PlatformKey = ContainerKey + -class DatabaseKey(PlatformKey): +class DatabaseKey(ContainerKey): database: str @@ -90,11 +99,11 @@ class SchemaKey(DatabaseKey): db_schema: str = Field(alias="schema") -class ProjectIdKey(PlatformKey): +class ProjectIdKey(ContainerKey): project_id: str -class MetastoreKey(PlatformKey): +class MetastoreKey(ContainerKey): metastore: str @@ -110,11 +119,11 @@ class BigQueryDatasetKey(ProjectIdKey): dataset_id: str -class FolderKey(PlatformKey): +class FolderKey(ContainerKey): folder_abs_path: str -class BucketKey(PlatformKey): +class BucketKey(ContainerKey): bucket_name: str @@ -127,7 +136,7 @@ def default(self, obj: Any) -> Any: return json.JSONEncoder.default(self, obj) -KeyType = TypeVar("KeyType", bound=PlatformKey) +KeyType = TypeVar("KeyType", bound=ContainerKey) def add_domain_to_entity_wu( @@ -188,7 +197,7 @@ def gen_containers( container_key: KeyType, name: str, sub_types: List[str], - parent_container_key: Optional[PlatformKey] = None, + parent_container_key: Optional[ContainerKey] = None, extra_properties: Optional[Dict[str, str]] = None, domain_urn: Optional[str] = None, description: Optional[str] = None, @@ -199,9 +208,7 @@ def gen_containers( created: Optional[int] = None, last_modified: Optional[int] = None, ) -> Iterable[MetadataWorkUnit]: - container_urn = make_container_urn( - guid=container_key.guid(), - ) + container_urn = container_key.as_urn() yield MetadataChangeProposalWrapper( entityUrn=f"{container_urn}", # entityKeyAspect=ContainerKeyClass(guid=parent_container_key.guid()), diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 2f817ee69a637..cac53c350f2ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -88,6 +88,12 @@ class RemovedStatusFilter(enum.Enum): """Search only soft-deleted entities.""" +@dataclass +class RelatedEntity: + urn: str + relationship_type: str + + def _graphql_entity_type(entity_type: str) -> str: """Convert the entity types into GraphQL "EntityType" enum values.""" @@ -769,11 +775,6 @@ class RelationshipDirection(str, enum.Enum): INCOMING = "INCOMING" OUTGOING = "OUTGOING" - @dataclass - class RelatedEntity: - urn: str - relationship_type: str - def get_related_entities( self, entity_urn: str, @@ -794,7 +795,7 @@ def get_related_entities( }, ) for related_entity in response.get("entities", []): - yield DataHubGraph.RelatedEntity( + yield RelatedEntity( urn=related_entity["urn"], relationship_type=related_entity["relationshipType"], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 919c803222066..ccda00ba293ef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -20,7 +20,7 @@ set_dataset_urn_to_lower, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import BigQueryDatasetKey, PlatformKey, ProjectIdKey +from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -434,7 +434,7 @@ def get_dataplatform_instance_aspect( entityUrn=dataset_urn, aspect=aspect ).as_workunit() - def gen_dataset_key(self, db_name: str, schema: str) -> PlatformKey: + def gen_dataset_key(self, db_name: str, schema: str) -> ContainerKey: return BigQueryDatasetKey( project_id=db_name, dataset_id=schema, @@ -443,7 +443,7 @@ def gen_dataset_key(self, db_name: str, schema: str) -> PlatformKey: backcompat_env_as_instance=True, ) - def gen_project_id_key(self, database: str) -> PlatformKey: + def gen_project_id_key(self, database: str) -> ContainerKey: return ProjectIdKey( project_id=database, platform=self.platform, diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py index 0a65537772390..b04718a9eabba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py @@ -3,9 +3,9 @@ from datahub.emitter.mcp_builder import ( BucketKey, + ContainerKey, FolderKey, KeyType, - PlatformKey, add_dataset_to_container, gen_containers, ) @@ -45,7 +45,7 @@ def create_emit_containers( container_key: KeyType, name: str, sub_types: List[str], - parent_container_key: Optional[PlatformKey] = None, + parent_container_key: Optional[ContainerKey] = None, domain_urn: Optional[str] = None, ) -> Iterable[MetadataWorkUnit]: if container_key.guid() not in self.processed_containers: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 33596091e420d..919cb83e4d832 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -9,7 +9,7 @@ import datahub.emitter.mce_builder as builder import datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes as powerbi_data_classes from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import PlatformKey, gen_containers +from datahub.emitter.mcp_builder import ContainerKey, gen_containers from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -104,7 +104,7 @@ def __init__( self.__reporter = reporter self.__dataplatform_instance_resolver = dataplatform_instance_resolver self.processed_datasets: Set[powerbi_data_classes.PowerBIDataset] = set() - self.workspace_key: PlatformKey + self.workspace_key: ContainerKey @staticmethod def urn_to_lowercase(value: str, flag: bool) -> str: @@ -256,7 +256,6 @@ def to_datahub_schema( self, table: powerbi_data_classes.Table, ) -> SchemaMetadataClass: - fields = [] table_fields = ( [self.to_datahub_schema_field(column) for column in table.columns] diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index 28a5fac8b127b..2d2d9f527788f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.metadata.schema_classes import ( BooleanTypeClass, DateTypeClass, @@ -28,11 +28,11 @@ } -class WorkspaceKey(PlatformKey): +class WorkspaceKey(ContainerKey): workspace: str -class DatasetKey(PlatformKey): +class DatasetKey(ContainerKey): dataset: str @@ -57,7 +57,7 @@ def get_workspace_key( platform_name: str, platform_instance: Optional[str] = None, workspace_id_as_urn_part: Optional[bool] = False, - ) -> PlatformKey: + ) -> ContainerKey: return WorkspaceKey( workspace=self.get_urn_part(workspace_id_as_urn_part), platform=platform_name, @@ -150,7 +150,7 @@ def __eq__(self, instance): def __hash__(self): return hash(self.__members()) - def get_dataset_key(self, platform_name: str) -> PlatformKey: + def get_dataset_key(self, platform_name: str) -> ContainerKey: return DatasetKey( dataset=self.id, platform=platform_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py index 92560a11b90eb..9f409793272dd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py @@ -82,8 +82,6 @@ class Constant: Constant.LINKED_REPORTS: "{PBIRS_BASE_URL}/LinkedReports", Constant.LINKED_REPORT: "{PBIRS_BASE_URL}/LinkedReports({LINKED_REPORT_ID})", Constant.ME: "{PBIRS_BASE_URLL}/Me", - Constant.MOBILE_REPORTS: "{PBIRS_BASE_URL}/MobileReports", - Constant.MOBILE_REPORT: "{PBIRS_BASE_URL}/MobileReports({MOBILE_REPORT_ID})", Constant.POWERBI_REPORTS: "{PBIRS_BASE_URL}/PowerBiReports", Constant.POWERBI_REPORT: "{PBIRS_BASE_URL}/PowerBiReports({POWERBI_REPORT_ID})", Constant.POWERBI_REPORT_DATASOURCES: "{PBIRS_BASE_URL}/PowerBiReports({ID})/DataSources", diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index 7419241511eaf..e66119f6e8d76 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -35,7 +35,6 @@ from datahub.ingestion.source.powerbi_report_server.report_server_domain import ( CorpUser, LinkedReport, - MobileReport, Owner, OwnershipData, PowerBiReport, @@ -116,6 +115,29 @@ class PowerBiReportServerDashboardSourceConfig(PowerBiReportServerAPIConfig): chart_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() +def log_http_error(e: BaseException, message: str) -> Any: + LOGGER.warning(message) + + if isinstance(e, requests.exceptions.HTTPError): + LOGGER.warning(f"HTTP status-code = {e.response.status_code}") + + LOGGER.debug(msg=message, exc_info=e) + + return e + + +def get_response_dict(response: requests.Response, error_message: str) -> dict: + + result_dict: dict = {} + try: + response.raise_for_status() + result_dict = response.json() + except BaseException as e: + log_http_error(e=e, message=error_message) + + return result_dict + + class PowerBiReportServerAPI: # API endpoints of PowerBI Report Server to fetch reports, datasets @@ -144,14 +166,15 @@ def requests_get(self, url_http: str, url_https: str, content_type: str) -> Any: url=url_http, auth=self.get_auth_credentials, ) - # Check if we got response from PowerBi Report Server - if response.status_code != 200: - message: str = "Failed to fetch Report from powerbi-report-server for" - LOGGER.warning(message) - LOGGER.warning("{}={}".format(Constant.ReportId, content_type)) - raise ValueError(message) - return response.json() + error_message: str = ( + f"Failed to fetch {content_type} Report from powerbi-report-server" + ) + + return get_response_dict( + response=response, + error_message=error_message, + ) def get_all_reports(self) -> List[Any]: """ @@ -159,7 +182,6 @@ def get_all_reports(self) -> List[Any]: """ report_types_mapping: Dict[str, Any] = { Constant.REPORTS: Report, - Constant.MOBILE_REPORTS: MobileReport, Constant.LINKED_REPORTS: LinkedReport, Constant.POWERBI_REPORTS: PowerBiReport, } @@ -174,15 +196,17 @@ def get_all_reports(self) -> List[Any]: report_get_endpoint_https = report_get_endpoint.format( PBIRS_BASE_URL=self.__config.get_base_api_https_url, ) + response_dict = self.requests_get( url_http=report_get_endpoint_http, url_https=report_get_endpoint_https, content_type=report_type, - )["value"] - if response_dict: + ) + + if response_dict.get("value"): reports.extend( report_types_mapping[report_type].parse_obj(report) - for report in response_dict + for report in response_dict.get("value") ) return reports @@ -487,7 +511,6 @@ class PowerBiReportServerDashboardSource(Source): Next types of report can be ingested: - PowerBI report(.pbix) - Paginated report(.rdl) - - Mobile report - Linked report """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py index adcbcaaed96e6..60426fc5bd660 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py @@ -239,11 +239,6 @@ class Manifest(BaseModel): resources: List[Dict[str, List]] = Field(alias="Resources") -class MobileReport(CatalogItem): - allow_caching: bool = Field(alias="AllowCaching") - manifest: Manifest = Field(alias="Manifest") - - class PowerBIReport(CatalogItem): has_data_sources: bool = Field(alias="HasDataSources") diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 46f9fd240db04..8b2eed36ac6b3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -9,7 +9,7 @@ from sqlalchemy.engine.reflection import Inspector from datahub.configuration.validate_field_rename import pydantic_renamed_field -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.ingestion.api.decorators import ( SourceCapability, SupportStatus, @@ -211,7 +211,7 @@ def gen_schema_containers( extra_properties=extra_properties, ) - def get_database_container_key(self, db_name: str, schema: str) -> PlatformKey: + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. # Based on community feedback, db_name only available if it is explicitly specified in the connection string. @@ -232,7 +232,7 @@ def add_table_to_schema_container( dataset_urn: str, db_name: str, schema: str, - schema_container_key: Optional[PlatformKey] = None, + schema_container_key: Optional[ContainerKey] = None, ) -> Iterable[MetadataWorkUnit]: yield from add_table_to_schema_container( dataset_urn=dataset_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py index 7554dd5af3103..a5f5034d175c6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py @@ -8,8 +8,8 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( + ContainerKey, DatabaseKey, - PlatformKey, SchemaKey, add_dataset_to_container, add_domain_to_entity_wu, @@ -28,7 +28,7 @@ def gen_schema_key( platform: str, platform_instance: Optional[str], env: Optional[str], -) -> PlatformKey: +) -> ContainerKey: return SchemaKey( database=db_name, schema=schema, @@ -41,7 +41,7 @@ def gen_schema_key( def gen_database_key( database: str, platform: str, platform_instance: Optional[str], env: Optional[str] -) -> PlatformKey: +) -> ContainerKey: return DatabaseKey( database=database, platform=platform, @@ -55,8 +55,8 @@ def gen_schema_container( schema: str, database: str, sub_types: List[str], - database_container_key: PlatformKey, - schema_container_key: PlatformKey, + database_container_key: ContainerKey, + schema_container_key: ContainerKey, domain_registry: Optional[DomainRegistry] = None, domain_config: Optional[Dict[str, AllowDenyPattern]] = None, name: Optional[str] = None, @@ -113,7 +113,7 @@ def gen_domain_urn( def gen_database_container( database: str, - database_container_key: PlatformKey, + database_container_key: ContainerKey, sub_types: List[str], domain_config: Optional[Dict[str, AllowDenyPattern]] = None, domain_registry: Optional[DomainRegistry] = None, @@ -152,7 +152,7 @@ def gen_database_container( def add_table_to_schema_container( dataset_urn: str, - parent_container_key: PlatformKey, + parent_container_key: ContainerKey, ) -> Iterable[MetadataWorkUnit]: yield from add_dataset_to_container( container_key=parent_container_key, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py index f105829d874de..d9062cef06eae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py @@ -7,7 +7,7 @@ from datahub.configuration.common import AllowDenyPattern from datahub.configuration.validate_field_rename import pydantic_renamed_field -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.sql_common import SQLAlchemySource, logger from datahub.ingestion.source.sql.sql_config import ( @@ -56,7 +56,7 @@ def __init__(self, config, ctx, platform): super().__init__(config, ctx, platform) self.config: TwoTierSQLAlchemyConfig = config - def get_database_container_key(self, db_name: str, schema: str) -> PlatformKey: + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. assert db_name == schema @@ -72,7 +72,7 @@ def add_table_to_schema_container( dataset_urn: str, db_name: str, schema: str, - schema_container_key: Optional[PlatformKey] = None, + schema_container_key: Optional[ContainerKey] = None, ) -> Iterable[MetadataWorkUnit]: yield from add_table_to_schema_container( dataset_urn=dataset_urn, @@ -86,7 +86,7 @@ def get_allowed_schemas( # dbName itself as an allowed schema yield db_name - def gen_schema_key(self, db_name: str, schema: str) -> PlatformKey: + def gen_schema_key(self, db_name: str, schema: str) -> ContainerKey: # Sanity check that we don't try to generate schema containers for 2 tier databases. raise NotImplementedError diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 5ad39425c3f73..67bd1af6c2d7f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -33,7 +33,7 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( - PlatformKey, + ContainerKey, add_entity_to_container, gen_containers, ) @@ -358,11 +358,11 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict: return values -class WorkbookKey(PlatformKey): +class WorkbookKey(ContainerKey): workbook_id: str -class ProjectKey(PlatformKey): +class ProjectKey(ContainerKey): project_id: str @@ -1682,7 +1682,7 @@ def emit_datasource( ) def _get_datasource_container_key(self, datasource, workbook, is_embedded_ds): - container_key: Optional[PlatformKey] = None + container_key: Optional[ContainerKey] = None if is_embedded_ds: # It is embedded then parent is container is workbook if workbook is not None: container_key = self.gen_workbook_key(workbook) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 9d82a9e247a00..ec7d00c7bcc63 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -15,8 +15,8 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( CatalogKey, + ContainerKey, MetastoreKey, - PlatformKey, UnitySchemaKey, add_dataset_to_container, gen_containers, @@ -432,7 +432,7 @@ def gen_catalog_containers(self, catalog: Catalog) -> Iterable[MetadataWorkUnit] external_url=f"{self.external_url_base}/{catalog.name}", ) - def gen_schema_key(self, schema: Schema) -> PlatformKey: + def gen_schema_key(self, schema: Schema) -> ContainerKey: return UnitySchemaKey( unity_schema=schema.name, platform=self.platform, diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json new file mode 100644 index 0000000000000..69a567654cac1 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json @@ -0,0 +1,326 @@ +[ +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "TEST_USER" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testa" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testa", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testa" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "TEST_USER" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testd" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testd", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testd" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json index 9b202baa947d5..f4277e41d58c7 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json @@ -5,8 +5,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -19,8 +23,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -33,8 +38,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -47,8 +53,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testa\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testa" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -61,8 +70,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testa\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testa\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testa", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testa" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -75,8 +108,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -89,8 +123,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -103,8 +139,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -117,8 +167,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -131,8 +185,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -145,78 +200,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "browsePaths", - "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testb\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "dashboardInfo", - "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testb\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testb\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "dashboardKey", - "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938b\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -229,8 +215,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -243,8 +233,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -257,8 +248,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -271,8 +263,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testc\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testc" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -285,8 +280,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testc\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testc\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testc", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testc" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -299,8 +318,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -313,8 +333,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938c\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938c" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -327,8 +349,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -341,8 +377,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -355,8 +395,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -369,8 +410,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -383,8 +425,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testd\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testd" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -397,8 +442,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testd\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testd\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testd", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testd" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -411,8 +480,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -425,8 +495,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -439,8 +511,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py index c03190be66964..826c2b77bce36 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py +++ b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py @@ -21,7 +21,7 @@ def mock_user_to_add(*args, **kwargs): return None -def register_mock_api(request_mock): +def register_mock_api(request_mock, override_mock_data={}): api_vs_response = { "https://host_port/Reports/api/v2.0/Reports": { "method": "GET", @@ -52,37 +52,6 @@ def register_mock_api(request_mock): ] }, }, - "https://host_port/Reports/api/v2.0/MobileReports": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "Id": "ee56dc21-248a-4138-a446-ee5ab1fc938b", - "Name": "Testb", - "Description": None, - "Path": "/path/to/Testb", - "Type": "MobileReport", - "Hidden": False, - "Size": 1010101, - "ModifiedBy": "TEST_USER", - "ModifiedDate": str(datetime.now()), - "CreatedBy": "TEST_USER", - "CreatedDate": str(datetime.now()), - "ParentFolderId": "47495172-89ab-455f-a446-fffd3cf239cb", - "IsFavorite": False, - "ContentType": None, - "Content": "", - "HasDataSources": True, - "Roles": [], - "HasSharedDataSets": True, - "HasParameters": True, - "AllowCaching": True, - "Manifest": {"Resources": []}, - }, - ] - }, - }, "https://host_port/Reports/api/v2.0/LinkedReports": { "method": "GET", "status_code": 200, @@ -141,6 +110,8 @@ def register_mock_api(request_mock): }, } + api_vs_response.update(override_mock_data) + for url in api_vs_response.keys(): request_mock.register_uri( api_vs_response[url]["method"], @@ -164,6 +135,30 @@ def default_source_config(): } +def get_default_recipe(output_path: str) -> dict: + return { + "run_id": "powerbi-report-server-test", + "source": { + "type": "powerbi-report-server", + "config": { + **default_source_config(), + }, + }, + "sink": { + "type": "file", + "config": {"filename": output_path}, # , + }, + } + + +def add_mock_method_in_pipeline(pipeline: Pipeline) -> None: + pipeline.ctx.graph = mock.MagicMock() + pipeline.ctx.graph.get_ownership = mock.MagicMock() + pipeline.ctx.graph.get_ownership.side_effect = mock_existing_users + pipeline.ctx.graph.get_aspect_v2 = mock.MagicMock() + pipeline.ctx.graph.get_aspect_v2.side_effect = mock_user_to_add + + @freeze_time(FROZEN_TIME) @mock.patch("requests_ntlm.HttpNtlmAuth") def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock): @@ -174,34 +169,54 @@ def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_m register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( - { - "run_id": "powerbi-report-server-test", - "source": { - "type": "powerbi-report-server", - "config": { - **default_source_config(), - }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/powerbi_report_server_mces.json", - }, - }, - } + get_default_recipe(output_path=f"{tmp_path}/powerbi_report_server_mces.json") ) - pipeline.ctx.graph = mock.MagicMock() - pipeline.ctx.graph.get_ownership = mock.MagicMock() - pipeline.ctx.graph.get_ownership.side_effect = mock_existing_users - pipeline.ctx.graph.get_aspect_v2 = mock.MagicMock() - pipeline.ctx.graph.get_aspect_v2.side_effect = mock_user_to_add + + add_mock_method_in_pipeline(pipeline=pipeline) + + pipeline.run() + pipeline.raise_from_status() + + golden_file = "golden_test_ingest.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "powerbi_report_server_mces.json", + golden_path=f"{test_resources_dir}/{golden_file}", + ) + + +@freeze_time(FROZEN_TIME) +@mock.patch("requests_ntlm.HttpNtlmAuth") +def test_powerbi_ingest_with_failure( + mock_msal, pytestconfig, tmp_path, mock_time, requests_mock +): + test_resources_dir = ( + pytestconfig.rootpath / "tests/integration/powerbi_report_server" + ) + + register_mock_api( + request_mock=requests_mock, + override_mock_data={ + "https://host_port/Reports/api/v2.0/LinkedReports": { + "method": "GET", + "status_code": 404, + "json": {"error": "Request Failed"}, + } + }, + ) + + pipeline = Pipeline.create( + get_default_recipe(output_path=f"{tmp_path}/powerbi_report_server_mces.json") + ) + + add_mock_method_in_pipeline(pipeline=pipeline) pipeline.run() pipeline.raise_from_status() - mce_out_file = "golden_test_ingest.json" + golden_file = "golden_test_fail_api_ingest.json" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / "powerbi_report_server_mces.json", - golden_path=f"{test_resources_dir}/{mce_out_file}", + golden_path=f"{test_resources_dir}/{golden_file}", ) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index 804ab3303f6b0..56f6fed3ad9d2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -154,8 +154,8 @@ public SearchResult getCachedSearchResults( batchSize, querySize -> getRawSearchResults(entityName, query, filters, sortCriterion, querySize.getFrom(), querySize.getSize(), flags, facets), - querySize -> Quintet.with(entityName, query, filters != null ? toJsonString(filters) : null, - sortCriterion != null ? toJsonString(sortCriterion) : null, querySize), flags, enableCache).getSearchResults(from, size); + querySize -> Sextet.with(entityName, query, filters != null ? toJsonString(filters) : null, + sortCriterion != null ? toJsonString(sortCriterion) : null, facets, querySize), flags, enableCache).getSearchResults(from, size); }