diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java index 41a1d22485ea4..76abddc9a99a9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java @@ -49,6 +49,8 @@ public CompletableFuture get(DataFetchingEnvironment environmen final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; final String query = input.getQuery() != null ? input.getQuery() : "*"; + // escape forward slash since it is a reserved character in Elasticsearch + final String sanitizedQuery = ResolverUtils.escapeForwardSlash(query); return CompletableFuture.supplyAsync(() -> { try { @@ -64,7 +66,7 @@ public CompletableFuture get(DataFetchingEnvironment environmen maybeResolvedView != null ? SearchUtils.combineFilters(filter, maybeResolvedView.getDefinition().getFilter()) : filter, - query, + sanitizedQuery, start, count, context.getAuthentication() diff --git a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx index 87efae03e0a7c..0a6316a71db16 100644 --- a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx +++ b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx @@ -33,13 +33,17 @@ const StyledLink = styled(Link)` font-weight: 700; `; +const TextContent = styled.div` + max-width: 1025px; +`; + export default function AcrylDemoBanner() { return ( - Schedule a Demo of Managed Datahub - + Schedule a Demo of Managed DataHub + DataHub is already the industry's #1 Open Source Data Catalog.{' '} Schedule a demo {' '} - of Acryl Cloud to see the advanced features that take it to the next level! - + of Acryl DataHub to see the advanced features that take it to the next level or purchase Acryl Cloud + on{' '} + + AWS Marketplace + + ! + ); diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index b5ffd1964d7c1..7d651fd5d1894 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -486,6 +486,7 @@ module.exports = { "docs/how/add-custom-ingestion-source", "docs/how/add-custom-data-platform", "docs/advanced/browse-paths-upgrade", + "docs/browseV2/browse-paths-v2", ], }, ], diff --git a/docs/browseV2/browse-paths-v2.md b/docs/browseV2/browse-paths-v2.md new file mode 100644 index 0000000000000..b1f63b4a182ea --- /dev/null +++ b/docs/browseV2/browse-paths-v2.md @@ -0,0 +1,51 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Generating Browse Paths (V2) + + + +## Introduction + +Browse (V2) is a way for users to explore and dive deeper into their data. Its integration with the search experience allows users to combine search queries and filters with entity type and platform nested folders. + +Most entities should have a browse path that allows users to navigate the left side panel on the search page to find groups of entities under different folders that come from these browse paths. Below, you can see an example of the sidebar with some new browse paths. + +

+ +

+ +This new browse sidebar always starts with Entity Type, then optionally shows Environment (PROD, DEV, etc.) if there are 2 or more Environments, then Platform. Below the Platform level, we render out folders that come directly from entity's [browsePathsV2](https://datahubproject.io/docs/generated/metamodel/entities/dataset#browsepathsv2) aspects. + +## Generating Custom Browse Paths + +A `browsePathsV2` aspect has a field called `path` which contains a list of `BrowsePathEntry` objects. Each object in the path represents one level of the entity's browse path where the first entry is the highest level and the last entry is the lowest level. + +If an entity has this aspect filled out, their browse path will show up in the browse sidebar so that you can navigate its folders and select one to filter search results down. + +For example, in the browse sidebar on the left of the image above, there are 10 Dataset entities from the BigQuery Platform that have `browsePathsV2` aspects that look like the following: + +``` +[ { id: "bigquery-public-data" }, { id: "covid19_public_forecasts" } ] +``` + +The `id` in a `BrowsePathEntry` is required and is what will be shown in the UI unless the optional `urn` field is populated. If the `urn` field is populated, we will try to resolve this path entry into an entity object and display that entity's name. We will also show a link to allow you to open up the entity profile. + +The `urn` field should only be populated if there is an entity in your DataHub instance that belongs in that entity's browse path. This makes most sense for Datasets to have Container entities in the browse paths as well as some other cases such as a DataFlow being part of a DataJob's browse path. For any other situation, feel free to leave `urn` empty and populate `id` with the text you want to be shown in the UI for your entity's path. + +## Additional Resources + +### GraphQL + +* [browseV2](../../graphql/queries.md#browsev2) + +## FAQ and Troubleshooting + +**How are browsePathsV2 aspects created?** + +We create `browsePathsV2` aspects for all entities that should have one by default when you ingest your data if this aspect is not already provided. This happens based on separator characters that appear within an Urn. + +Our ingestion sources are also producing `browsePathsV2` aspects since CLI version v0.10.5. + +### Related Features + +* [Search](../how/search.md) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 03b3d763ed247..ad12aacd00339 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -15,11 +15,14 @@ This file documents any backwards-incompatible changes in DataHub and assists pe certain column-level metrics. Instead, set `profile_table_level_only` to `false` and individually enable / disable desired field metrics. - #8451: The `bigquery-beta` and `snowflake-beta` source aliases have been dropped. Use `bigquery` and `snowflake` as the source type instead. +- #8472: Ingestion runs created with Pipeline.create will show up in the DataHub ingestion tab as CLI-based runs. To revert to the previous behavior of not showing these runs in DataHub, pass `no_default_report=True`. ### Potential Downtime ### Deprecations +- #8198: In the Python SDK, the `PlatformKey` class has been renamed to `ContainerKey`. + ### Other notable Changes ## 0.10.4 diff --git a/metadata-ingestion/examples/library/create_mlmodel.py b/metadata-ingestion/examples/library/create_mlmodel.py index c324f8a820639..630e682eff842 100644 --- a/metadata-ingestion/examples/library/create_mlmodel.py +++ b/metadata-ingestion/examples/library/create_mlmodel.py @@ -31,6 +31,16 @@ description="my feature", groups=model_group_urns, mlFeatures=feature_urns, + trainingMetrics=[ + models.MLMetricClass( + name="accuracy", description="accuracy of the model", value="1.0" + ) + ], + hyperParams=[ + models.MLHyperParamClass( + name="hyper_1", description="hyper_1", value="0.102" + ) + ], ), ) diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py index bec1672264b88..f20272ecd9dbf 100644 --- a/metadata-ingestion/src/datahub/cli/check_cli.py +++ b/metadata-ingestion/src/datahub/cli/check_cli.py @@ -61,7 +61,8 @@ def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None: "type": "file", "config": {"filename": out_file.name}, }, - } + }, + no_default_report=True, ) pipeline.run() diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index e2b7b2a2e1ff4..918f610ce4635 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -985,7 +985,7 @@ def ingest_sample_data(path: Optional[str], token: Optional[str]) -> None: if token is not None: recipe["sink"]["config"]["token"] = token - pipeline = Pipeline.create(recipe) + pipeline = Pipeline.create(recipe, no_default_report=True) pipeline.run() ret = pipeline.pretty_print_summary() sys.exit(ret) diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index c8c352d1f83ff..72c15e92257aa 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -253,7 +253,7 @@ def mcps(path: str) -> None: }, } - pipeline = Pipeline.create(recipe) + pipeline = Pipeline.create(recipe, no_default_report=True) pipeline.run() ret = pipeline.pretty_print_summary() sys.exit(ret) diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 9c44949741297..47727d5784a19 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -6,7 +6,17 @@ import time from enum import Enum from hashlib import md5 -from typing import Any, List, Optional, Type, TypeVar, Union, cast, get_type_hints +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Type, + TypeVar, + Union, + cast, + get_type_hints, +) import typing_inspect @@ -50,6 +60,9 @@ os.getenv("DATAHUB_DATASET_URN_TO_LOWER", "false") == "true" ) +if TYPE_CHECKING: + from datahub.emitter.mcp_builder import DatahubKey + # TODO: Delete this once lower-casing is the standard. def set_dataset_urn_to_lower(value: bool) -> None: @@ -132,7 +145,11 @@ def dataset_key_to_urn(key: DatasetKeyClass) -> str: ) -def make_container_urn(guid: str) -> str: +def make_container_urn(guid: Union[str, "DatahubKey"]) -> str: + from datahub.emitter.mcp_builder import DatahubKey + + if isinstance(guid, DatahubKey): + guid = guid.guid() return f"urn:li:container:{guid}" diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index 9051f2e82fa1f..40df214f49433 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -54,7 +54,9 @@ def guid(self) -> str: return _stable_guid_from_dict(bag) -class PlatformKey(DatahubKey): +class ContainerKey(DatahubKey): + """Base class for container guid keys. Most users should use one of the subclasses instead.""" + platform: str instance: Optional[str] = None @@ -81,8 +83,15 @@ def guid_dict(self) -> Dict[str, str]: def property_dict(self) -> Dict[str, str]: return self.dict(by_alias=True, exclude_none=True) + def as_urn(self) -> str: + return make_container_urn(guid=self.guid()) + + +# DEPRECATION: Keeping the `PlatformKey` name around for backwards compatibility. +PlatformKey = ContainerKey + -class DatabaseKey(PlatformKey): +class DatabaseKey(ContainerKey): database: str @@ -90,11 +99,11 @@ class SchemaKey(DatabaseKey): db_schema: str = Field(alias="schema") -class ProjectIdKey(PlatformKey): +class ProjectIdKey(ContainerKey): project_id: str -class MetastoreKey(PlatformKey): +class MetastoreKey(ContainerKey): metastore: str @@ -110,11 +119,11 @@ class BigQueryDatasetKey(ProjectIdKey): dataset_id: str -class FolderKey(PlatformKey): +class FolderKey(ContainerKey): folder_abs_path: str -class BucketKey(PlatformKey): +class BucketKey(ContainerKey): bucket_name: str @@ -127,7 +136,7 @@ def default(self, obj: Any) -> Any: return json.JSONEncoder.default(self, obj) -KeyType = TypeVar("KeyType", bound=PlatformKey) +KeyType = TypeVar("KeyType", bound=ContainerKey) def add_domain_to_entity_wu( @@ -188,7 +197,7 @@ def gen_containers( container_key: KeyType, name: str, sub_types: List[str], - parent_container_key: Optional[PlatformKey] = None, + parent_container_key: Optional[ContainerKey] = None, extra_properties: Optional[Dict[str, str]] = None, domain_urn: Optional[str] = None, description: Optional[str] = None, @@ -199,9 +208,7 @@ def gen_containers( created: Optional[int] = None, last_modified: Optional[int] = None, ) -> Iterable[MetadataWorkUnit]: - container_urn = make_container_urn( - guid=container_key.guid(), - ) + container_urn = container_key.as_urn() yield MetadataChangeProposalWrapper( entityUrn=f"{container_urn}", # entityKeyAspect=ContainerKeyClass(guid=parent_container_key.guid()), diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index fa037f8d7328b..0bcc220cad49b 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -251,7 +251,7 @@ def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor platform_instance: Optional[str] = None if isinstance(config, PlatformInstanceConfigMixin) and config.platform_instance: - platform_instance = platform_instance + platform_instance = config.platform_instance return partial( auto_browse_path_v2, diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 2f817ee69a637..de8b28d4b95a8 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -57,12 +57,12 @@ class DatahubClientConfig(ConfigModel): """Configuration class for holding connectivity to datahub gms""" server: str = "http://localhost:8080" - token: Optional[str] - timeout_sec: Optional[int] - retry_status_codes: Optional[List[int]] - retry_max_times: Optional[int] - extra_headers: Optional[Dict[str, str]] - ca_certificate_path: Optional[str] + token: Optional[str] = None + timeout_sec: Optional[int] = None + retry_status_codes: Optional[List[int]] = None + retry_max_times: Optional[int] = None + extra_headers: Optional[Dict[str, str]] = None + ca_certificate_path: Optional[str] = None disable_ssl_verification: bool = False _max_threads_moved_to_sink = pydantic_removed_field( @@ -88,6 +88,12 @@ class RemovedStatusFilter(enum.Enum): """Search only soft-deleted entities.""" +@dataclass +class RelatedEntity: + urn: str + relationship_type: str + + def _graphql_entity_type(entity_type: str) -> str: """Convert the entity types into GraphQL "EntityType" enum values.""" @@ -769,11 +775,6 @@ class RelationshipDirection(str, enum.Enum): INCOMING = "INCOMING" OUTGOING = "OUTGOING" - @dataclass - class RelatedEntity: - urn: str - relationship_type: str - def get_related_entities( self, entity_urn: str, @@ -794,7 +795,7 @@ def get_related_entities( }, ) for related_entity in response.get("entities", []): - yield DataHubGraph.RelatedEntity( + yield RelatedEntity( urn=related_entity["urn"], relationship_type=related_entity["relationshipType"], ) diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index 7fe39ef3e64c6..79d959965e0dd 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -328,7 +328,7 @@ def create( dry_run: bool = False, preview_mode: bool = False, preview_workunits: int = 10, - report_to: Optional[str] = None, + report_to: Optional[str] = "datahub", no_default_report: bool = False, raw_config: Optional[dict] = None, ) -> "Pipeline": diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py index f360b503b640f..6f6e8bbc05661 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py @@ -11,7 +11,6 @@ support_status, ) from datahub.ingestion.api.source import Source -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.aws.sagemaker_processors.common import ( SagemakerSourceConfig, @@ -57,9 +56,6 @@ def create(cls, config_dict, ctx): config = SagemakerSourceConfig.parse_obj(config_dict) return cls(config, ctx) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: # get common lineage graph lineage_processor = LineageProcessor( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 919c803222066..ccda00ba293ef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -20,7 +20,7 @@ set_dataset_urn_to_lower, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import BigQueryDatasetKey, PlatformKey, ProjectIdKey +from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -434,7 +434,7 @@ def get_dataplatform_instance_aspect( entityUrn=dataset_urn, aspect=aspect ).as_workunit() - def gen_dataset_key(self, db_name: str, schema: str) -> PlatformKey: + def gen_dataset_key(self, db_name: str, schema: str) -> ContainerKey: return BigQueryDatasetKey( project_id=db_name, dataset_id=schema, @@ -443,7 +443,7 @@ def gen_dataset_key(self, db_name: str, schema: str) -> PlatformKey: backcompat_env_as_instance=True, ) - def gen_project_id_key(self, database: str) -> PlatformKey: + def gen_project_id_key(self, database: str) -> ContainerKey: return ProjectIdKey( project_id=database, platform=self.platform, diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index 1e9879f599731..e41c02b462662 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -17,7 +17,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source_config.csv_enricher import CSVEnricherConfig from datahub.metadata.schema_classes import ( @@ -590,9 +589,6 @@ def maybe_extract_owners( ] return owners - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: # As per https://stackoverflow.com/a/49150749/5004662, we want to use # the 'utf-8-sig' encoding to handle any BOM character that may be diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py index 0a65537772390..b04718a9eabba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py @@ -3,9 +3,9 @@ from datahub.emitter.mcp_builder import ( BucketKey, + ContainerKey, FolderKey, KeyType, - PlatformKey, add_dataset_to_container, gen_containers, ) @@ -45,7 +45,7 @@ def create_emit_containers( container_key: KeyType, name: str, sub_types: List[str], - parent_container_key: Optional[PlatformKey] = None, + parent_container_key: Optional[ContainerKey] = None, domain_urn: Optional[str] = None, ) -> Iterable[MetadataWorkUnit]: if container_key.guid() not in self.processed_containers: diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py index 0c0cbaf3b4803..180ef00459214 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py @@ -21,7 +21,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.aws.s3_boto_utils import get_s3_tags from datahub.ingestion.source.aws.s3_util import ( @@ -340,9 +339,6 @@ def local_get_folders(self, path: str) -> Iterable[str]: for folder in os.listdir(path): yield os.path.join(path, folder) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.container_WU_creator = ContainerWUCreator( self.source_config.platform, diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py index ab6aa18dac4d2..b9165ce0ed160 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py +++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py @@ -33,7 +33,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.common.subtypes import DatasetSubTypes from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass @@ -343,7 +342,7 @@ def __init__(self, config: ElasticsearchSourceConfig, ctx: PipelineContext): self.report = ElasticsearchSourceReport() self.data_stream_partition_count: Dict[str, int] = defaultdict(int) self.platform: str = "elasticsearch" - self.profiling_info: Dict[str, DatasetProfileClass] = {} + self.cat_response: Optional[List[Dict[str, Any]]] = None @classmethod def create( @@ -352,12 +351,8 @@ def create( config = ElasticsearchSourceConfig.parse_obj(config_dict) return cls(config, ctx) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: indices = self.client.indices.get_alias() - for index in indices: self.report.report_index_scanned(index) @@ -366,12 +361,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield mcp.as_workunit() else: self.report.report_dropped(index) - for urn, profiling_info in self.profiling_info.items(): - yield MetadataChangeProposalWrapper( - entityUrn=urn, - aspect=profiling_info, - ).as_workunit() - self.profiling_info = {} for mcp in self._get_data_stream_index_count_mcps(): yield mcp.as_workunit() @@ -523,36 +512,44 @@ def _extract_mcps( ) if self.source_config.profiling.enabled: - cat_response = self.client.cat.indices( - index=index, params={"format": "json", "bytes": "b"} + if self.cat_response is None: + self.cat_response = self.client.cat.indices( + params={ + "format": "json", + "bytes": "b", + "h": "index,docs.count,store.size", + } + ) + if self.cat_response is None: + return + for item in self.cat_response: + item["index"] = collapse_name( + name=item["index"], + collapse_urns=self.source_config.collapse_urns, + ) + + profile_info_current = list( + filter(lambda x: x["index"] == collapsed_index_name, self.cat_response) ) - if len(cat_response) == 1: - index_res = cat_response[0] - docs_count = int(index_res["docs.count"]) - size = int(index_res["store.size"]) - if len(self.source_config.collapse_urns.urns_suffix_regex) > 0: - if dataset_urn not in self.profiling_info: - self.profiling_info[dataset_urn] = DatasetProfileClass( - timestampMillis=int(time.time() * 1000), - rowCount=docs_count, - columnCount=len(schema_fields), - sizeInBytes=size, - ) - else: - existing_profile = self.profiling_info[dataset_urn] - if existing_profile.rowCount is not None: - docs_count = docs_count + existing_profile.rowCount - if existing_profile.sizeInBytes is not None: - size = size + existing_profile.sizeInBytes - self.profiling_info[dataset_urn] = DatasetProfileClass( - timestampMillis=int(time.time() * 1000), - rowCount=docs_count, - columnCount=len(schema_fields), - sizeInBytes=size, - ) - else: - logger.warning( - "Unexpected response from cat response with multiple rows" + if len(profile_info_current) > 0: + self.cat_response = list( + filter( + lambda x: x["index"] != collapsed_index_name, self.cat_response + ) + ) + row_count = 0 + size_in_bytes = 0 + for profile_info in profile_info_current: + row_count += int(profile_info["docs.count"]) + size_in_bytes += int(profile_info["store.size"]) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=DatasetProfileClass( + timestampMillis=int(time.time() * 1000), + rowCount=row_count, + columnCount=len(schema_fields), + sizeInBytes=size_in_bytes, + ), ) def get_report(self): diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py index c4219fdc544f3..8faba7d113372 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/feast.py +++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py @@ -1,7 +1,5 @@ import sys -from datahub.ingestion.api.source_helpers import auto_workunit_reporter - if sys.version_info < (3, 8): raise ImportError("Feast is only supported on Python 3.8+") @@ -370,9 +368,6 @@ def create(cls, config_dict, ctx): config = FeastRepositorySourceConfig.parse_obj(config_dict) return cls(config, ctx) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: for feature_view in self.feature_store.list_feature_views(): for entity_name in feature_view.entities: diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 4ea721f6fd0cc..6faa29f264d36 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -360,18 +360,22 @@ def _get_column_cardinality( @_run_with_query_combiner def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None: if self.config.profile_table_row_count_estimate_only: - schema_name = self.dataset_name.split(".")[1] - table_name = self.dataset_name.split(".")[2] - logger.debug( - f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}" - ) - dialect_name = self.dataset.engine.dialect.name.lower() if dialect_name == "postgresql": + schema_name = self.dataset_name.split(".")[1] + table_name = self.dataset_name.split(".")[2] + logger.debug( + f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}" + ) get_estimate_script = sa.text( f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'" ) elif dialect_name == "mysql": + schema_name = self.dataset_name.split(".")[0] + table_name = self.dataset_name.split(".")[1] + logger.debug( + f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}" + ) get_estimate_script = sa.text( f"SELECT table_rows AS estimate FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}'" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index 17b53cb64caf6..0a6d8c605688a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -21,7 +21,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, @@ -611,9 +610,6 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: config = MetabaseConfig.parse_obj(config_dict) return cls(ctx, config) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield from self.emit_card_mces() yield from self.emit_dashboard_mces() diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py index 53822994506e3..b5d9d96354fc5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py @@ -20,11 +20,7 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import ( - auto_status_aspect, - auto_workunit, - auto_workunit_reporter, -) +from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.graph.client import DataHubGraph from datahub.utilities.registries.domain_registry import DomainRegistry @@ -503,14 +499,6 @@ def load_glossary_config( glossary_cfg = BusinessGlossaryConfig.parse_obj(config) return glossary_cfg - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter( - self.report, - auto_status_aspect( - self.get_workunits_internal(), - ), - ) - def get_workunits_internal( self, ) -> Iterable[MetadataWorkUnit]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py index 207e607ed14ca..1c0c809c16a60 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py @@ -1,5 +1,6 @@ import logging from dataclasses import dataclass, field +from functools import partial from typing import Any, Dict, Iterable, List, Optional from pydantic import validator @@ -26,8 +27,11 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter +from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source_helpers import ( + auto_status_aspect, + auto_workunit_reporter, +) from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( FineGrainedLineageDownstreamType, @@ -139,8 +143,11 @@ def load_lineage_config(file_name: str) -> LineageConfig: lineage_config = LineageConfig.parse_obj(config) return lineage_config - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [ + auto_status_aspect, + partial(auto_workunit_reporter, self.get_report()), + ] def get_workunits_internal( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 2bb185619d26c..0cf9932ba0878 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -25,7 +25,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, @@ -797,9 +796,6 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: config = ModeConfig.parse_obj(config_dict) return cls(ctx, config) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield from self.emit_dashboard_mces() yield from self.emit_chart_mces() diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 53e9094ee8178..f02b6845e40b5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -2,10 +2,6 @@ from dataclasses import dataclass, field from typing import Dict, Iterable, List, Optional, Tuple, Type, Union, ValuesView -import bson -import bson.dbref -import bson.int64 -import bson.objectid import bson.timestamp import pymongo import pymongo.collection @@ -26,7 +22,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.schema_inference.object import ( SchemaDescription, @@ -301,9 +296,6 @@ def get_field_type( return SchemaFieldDataType(type=TypeClass()) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: platform = "mongodb" diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index 559d103aa6e5a..ac1e03812db3b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -31,7 +31,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.schema_classes import ( DataFlowInfoClass, @@ -1024,9 +1023,6 @@ def authenticate(self): token_response.raise_for_status() self.session.headers.update({"Authorization": "Bearer " + token_response.text}) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: try: self.authenticate() diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi.py b/metadata-ingestion/src/datahub/ingestion/source/openapi.py index ad9aec927832b..78570a2a4ceca 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi.py @@ -18,7 +18,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.openapi_parser import ( clean_url, @@ -213,9 +212,6 @@ def build_wu( mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) return ApiWorkUnit(id=dataset_name, mce=mce) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: # noqa: C901 config = self.config diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 33596091e420d..919cb83e4d832 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -9,7 +9,7 @@ import datahub.emitter.mce_builder as builder import datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes as powerbi_data_classes from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import PlatformKey, gen_containers +from datahub.emitter.mcp_builder import ContainerKey, gen_containers from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -104,7 +104,7 @@ def __init__( self.__reporter = reporter self.__dataplatform_instance_resolver = dataplatform_instance_resolver self.processed_datasets: Set[powerbi_data_classes.PowerBIDataset] = set() - self.workspace_key: PlatformKey + self.workspace_key: ContainerKey @staticmethod def urn_to_lowercase(value: str, flag: bool) -> str: @@ -256,7 +256,6 @@ def to_datahub_schema( self, table: powerbi_data_classes.Table, ) -> SchemaMetadataClass: - fields = [] table_fields = ( [self.to_datahub_schema_field(column) for column in table.columns] diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index 28a5fac8b127b..2d2d9f527788f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.metadata.schema_classes import ( BooleanTypeClass, DateTypeClass, @@ -28,11 +28,11 @@ } -class WorkspaceKey(PlatformKey): +class WorkspaceKey(ContainerKey): workspace: str -class DatasetKey(PlatformKey): +class DatasetKey(ContainerKey): dataset: str @@ -57,7 +57,7 @@ def get_workspace_key( platform_name: str, platform_instance: Optional[str] = None, workspace_id_as_urn_part: Optional[bool] = False, - ) -> PlatformKey: + ) -> ContainerKey: return WorkspaceKey( workspace=self.get_urn_part(workspace_id_as_urn_part), platform=platform_name, @@ -150,7 +150,7 @@ def __eq__(self, instance): def __hash__(self): return hash(self.__members()) - def get_dataset_key(self, platform_name: str) -> PlatformKey: + def get_dataset_key(self, platform_name: str) -> ContainerKey: return DatasetKey( dataset=self.id, platform=platform_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py index 92560a11b90eb..9f409793272dd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py @@ -82,8 +82,6 @@ class Constant: Constant.LINKED_REPORTS: "{PBIRS_BASE_URL}/LinkedReports", Constant.LINKED_REPORT: "{PBIRS_BASE_URL}/LinkedReports({LINKED_REPORT_ID})", Constant.ME: "{PBIRS_BASE_URLL}/Me", - Constant.MOBILE_REPORTS: "{PBIRS_BASE_URL}/MobileReports", - Constant.MOBILE_REPORT: "{PBIRS_BASE_URL}/MobileReports({MOBILE_REPORT_ID})", Constant.POWERBI_REPORTS: "{PBIRS_BASE_URL}/PowerBiReports", Constant.POWERBI_REPORT: "{PBIRS_BASE_URL}/PowerBiReports({POWERBI_REPORT_ID})", Constant.POWERBI_REPORT_DATASOURCES: "{PBIRS_BASE_URL}/PowerBiReports({ID})/DataSources", diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index 7419241511eaf..80367558251a5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -26,7 +26,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.powerbi_report_server.constants import ( API_ENDPOINTS, @@ -35,7 +34,6 @@ from datahub.ingestion.source.powerbi_report_server.report_server_domain import ( CorpUser, LinkedReport, - MobileReport, Owner, OwnershipData, PowerBiReport, @@ -116,6 +114,29 @@ class PowerBiReportServerDashboardSourceConfig(PowerBiReportServerAPIConfig): chart_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() +def log_http_error(e: BaseException, message: str) -> Any: + LOGGER.warning(message) + + if isinstance(e, requests.exceptions.HTTPError): + LOGGER.warning(f"HTTP status-code = {e.response.status_code}") + + LOGGER.debug(msg=message, exc_info=e) + + return e + + +def get_response_dict(response: requests.Response, error_message: str) -> dict: + + result_dict: dict = {} + try: + response.raise_for_status() + result_dict = response.json() + except BaseException as e: + log_http_error(e=e, message=error_message) + + return result_dict + + class PowerBiReportServerAPI: # API endpoints of PowerBI Report Server to fetch reports, datasets @@ -144,14 +165,15 @@ def requests_get(self, url_http: str, url_https: str, content_type: str) -> Any: url=url_http, auth=self.get_auth_credentials, ) - # Check if we got response from PowerBi Report Server - if response.status_code != 200: - message: str = "Failed to fetch Report from powerbi-report-server for" - LOGGER.warning(message) - LOGGER.warning("{}={}".format(Constant.ReportId, content_type)) - raise ValueError(message) - return response.json() + error_message: str = ( + f"Failed to fetch {content_type} Report from powerbi-report-server" + ) + + return get_response_dict( + response=response, + error_message=error_message, + ) def get_all_reports(self) -> List[Any]: """ @@ -159,7 +181,6 @@ def get_all_reports(self) -> List[Any]: """ report_types_mapping: Dict[str, Any] = { Constant.REPORTS: Report, - Constant.MOBILE_REPORTS: MobileReport, Constant.LINKED_REPORTS: LinkedReport, Constant.POWERBI_REPORTS: PowerBiReport, } @@ -174,15 +195,17 @@ def get_all_reports(self) -> List[Any]: report_get_endpoint_https = report_get_endpoint.format( PBIRS_BASE_URL=self.__config.get_base_api_https_url, ) + response_dict = self.requests_get( url_http=report_get_endpoint_http, url_https=report_get_endpoint_https, content_type=report_type, - )["value"] - if response_dict: + ) + + if response_dict.get("value"): reports.extend( report_types_mapping[report_type].parse_obj(report) - for report in response_dict + for report in response_dict.get("value") ) return reports @@ -487,7 +510,6 @@ class PowerBiReportServerDashboardSource(Source): Next types of report can be ingested: - PowerBI report(.pbix) - Paginated report(.rdl) - - Mobile report - Linked report """ @@ -510,9 +532,6 @@ def create(cls, config_dict, ctx): config = PowerBiReportServerDashboardSourceConfig.parse_obj(config_dict) return cls(config, ctx) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: """ Datahub Ingestion framework invoke this method diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py index adcbcaaed96e6..60426fc5bd660 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py @@ -239,11 +239,6 @@ class Manifest(BaseModel): resources: List[Dict[str, List]] = Field(alias="Resources") -class MobileReport(CatalogItem): - allow_caching: bool = Field(alias="AllowCaching") - manifest: Manifest = Field(alias="Manifest") - - class PowerBIReport(CatalogItem): has_data_sources: bool = Field(alias="HasDataSources") diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index bc5225509343b..4019436bda2f0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -24,7 +24,6 @@ ) from datahub.ingestion.api.registry import import_path from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, @@ -776,9 +775,6 @@ def _emit_chart_mces(self) -> Iterable[MetadataWorkUnit]: def add_config_to_report(self) -> None: self.report.api_page_limit = self.config.api_page_limit - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.validate_connection() self.add_config_to_report() diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 8d35633010a3e..75f9a0fc34708 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -281,7 +281,7 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: ) ) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: sObjects = self.get_salesforce_objects() for sObject in sObjects: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 46f9fd240db04..8b2eed36ac6b3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -9,7 +9,7 @@ from sqlalchemy.engine.reflection import Inspector from datahub.configuration.validate_field_rename import pydantic_renamed_field -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.ingestion.api.decorators import ( SourceCapability, SupportStatus, @@ -211,7 +211,7 @@ def gen_schema_containers( extra_properties=extra_properties, ) - def get_database_container_key(self, db_name: str, schema: str) -> PlatformKey: + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. # Based on community feedback, db_name only available if it is explicitly specified in the connection string. @@ -232,7 +232,7 @@ def add_table_to_schema_container( dataset_urn: str, db_name: str, schema: str, - schema_container_key: Optional[PlatformKey] = None, + schema_container_key: Optional[ContainerKey] = None, ) -> Iterable[MetadataWorkUnit]: yield from add_table_to_schema_container( dataset_urn=dataset_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py index 3be9a5df4f0b9..3e89dd53c1eec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py @@ -5,6 +5,7 @@ from sqlalchemy import util from sqlalchemy.dialects.mysql import base from sqlalchemy.dialects.mysql.enumerated import SET +from sqlalchemy.engine.reflection import Inspector from datahub.ingestion.api.decorators import ( SourceCapability, @@ -83,3 +84,14 @@ def get_platform(self): def create(cls, config_dict, ctx): config = MySQLConfig.parse_obj(config_dict) return cls(config, ctx) + + def add_profile_metadata(self, inspector: Inspector) -> None: + if not self.config.profiling.enabled: + return + with inspector.engine.connect() as conn: + for row in conn.execute( + "SELECT table_schema, table_name, data_length from information_schema.tables" + ): + self.profile_metadata_info.dataset_name_to_storage_bytes[ + f"{row.table_schema}.{row.table_name}" + ] = row.data_length diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index fb659d9548540..42ea7aed9b620 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -309,6 +309,15 @@ def get_schema_metadata( ] +@dataclass +class ProfileMetadata: + """ + A class to hold information about the table for profile enrichment + """ + + dataset_name_to_storage_bytes: Dict[str, int] = field(default_factory=dict) + + class SQLAlchemySource(StatefulIngestionSourceBase): """A Base class for all SQL Sources that use SQLAlchemy to extend""" @@ -317,6 +326,7 @@ def __init__(self, config: SQLAlchemyConfig, ctx: PipelineContext, platform: str self.config = config self.platform = platform self.report: SQLSourceReport = SQLSourceReport() + self.profile_metadata_info: ProfileMetadata = ProfileMetadata() config_report = { config_option: config.dict().get(config_option) @@ -484,6 +494,16 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit profile_requests: List["GEProfilerRequest"] = [] if sql_config.profiling.enabled: profiler = self.get_profiler_instance(inspector) + try: + self.add_profile_metadata(inspector) + except Exception as e: + logger.warning( + "Failed to get enrichment data for profiler", exc_info=True + ) + self.report.report_warning( + "profile_metadata", + f"Failed to get enrichment data for profile {e}", + ) db_name = self.get_db_name(inspector) yield from self.gen_database_containers( @@ -1098,6 +1118,13 @@ def loop_profiler_requests( ), ) + def add_profile_metadata(self, inspector: Inspector) -> None: + """ + Method to add profile metadata in a sub-class that can be used to enrich profile metadata. + This is meant to change self.profile_metadata_info in the sub-class. + """ + pass + def loop_profiler( self, profile_requests: List["GEProfilerRequest"], @@ -1113,6 +1140,15 @@ def loop_profiler( if profile is None: continue dataset_name = request.pretty_name + if ( + dataset_name in self.profile_metadata_info.dataset_name_to_storage_bytes + and profile.sizeInBytes is None + ): + profile.sizeInBytes = ( + self.profile_metadata_info.dataset_name_to_storage_bytes[ + dataset_name + ] + ) dataset_urn = make_dataset_urn_with_platform_instance( self.platform, dataset_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py index 7554dd5af3103..a5f5034d175c6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py @@ -8,8 +8,8 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( + ContainerKey, DatabaseKey, - PlatformKey, SchemaKey, add_dataset_to_container, add_domain_to_entity_wu, @@ -28,7 +28,7 @@ def gen_schema_key( platform: str, platform_instance: Optional[str], env: Optional[str], -) -> PlatformKey: +) -> ContainerKey: return SchemaKey( database=db_name, schema=schema, @@ -41,7 +41,7 @@ def gen_schema_key( def gen_database_key( database: str, platform: str, platform_instance: Optional[str], env: Optional[str] -) -> PlatformKey: +) -> ContainerKey: return DatabaseKey( database=database, platform=platform, @@ -55,8 +55,8 @@ def gen_schema_container( schema: str, database: str, sub_types: List[str], - database_container_key: PlatformKey, - schema_container_key: PlatformKey, + database_container_key: ContainerKey, + schema_container_key: ContainerKey, domain_registry: Optional[DomainRegistry] = None, domain_config: Optional[Dict[str, AllowDenyPattern]] = None, name: Optional[str] = None, @@ -113,7 +113,7 @@ def gen_domain_urn( def gen_database_container( database: str, - database_container_key: PlatformKey, + database_container_key: ContainerKey, sub_types: List[str], domain_config: Optional[Dict[str, AllowDenyPattern]] = None, domain_registry: Optional[DomainRegistry] = None, @@ -152,7 +152,7 @@ def gen_database_container( def add_table_to_schema_container( dataset_urn: str, - parent_container_key: PlatformKey, + parent_container_key: ContainerKey, ) -> Iterable[MetadataWorkUnit]: yield from add_dataset_to_container( container_key=parent_container_key, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py index f105829d874de..d9062cef06eae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py @@ -7,7 +7,7 @@ from datahub.configuration.common import AllowDenyPattern from datahub.configuration.validate_field_rename import pydantic_renamed_field -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.sql_common import SQLAlchemySource, logger from datahub.ingestion.source.sql.sql_config import ( @@ -56,7 +56,7 @@ def __init__(self, config, ctx, platform): super().__init__(config, ctx, platform) self.config: TwoTierSQLAlchemyConfig = config - def get_database_container_key(self, db_name: str, schema: str) -> PlatformKey: + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. assert db_name == schema @@ -72,7 +72,7 @@ def add_table_to_schema_container( dataset_urn: str, db_name: str, schema: str, - schema_container_key: Optional[PlatformKey] = None, + schema_container_key: Optional[ContainerKey] = None, ) -> Iterable[MetadataWorkUnit]: yield from add_table_to_schema_container( dataset_urn=dataset_urn, @@ -86,7 +86,7 @@ def get_allowed_schemas( # dbName itself as an allowed schema yield db_name - def gen_schema_key(self, db_name: str, schema: str) -> PlatformKey: + def gen_schema_key(self, db_name: str, schema: str) -> ContainerKey: # Sanity check that we don't try to generate schema containers for 2 tier databases. raise NotImplementedError diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index ac08593d97e28..37ccc1d1fedb5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -27,7 +27,6 @@ platform_name, support_status, ) -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, @@ -144,9 +143,6 @@ def create(cls, config_dict: Dict, ctx: PipelineContext) -> "VerticaSource": config = VerticaConfig.parse_obj(config_dict) return cls(config, ctx) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: sql_config = self.config if logger.isEnabledFor(logging.DEBUG): diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 5ad39425c3f73..67bd1af6c2d7f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -33,7 +33,7 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( - PlatformKey, + ContainerKey, add_entity_to_container, gen_containers, ) @@ -358,11 +358,11 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict: return values -class WorkbookKey(PlatformKey): +class WorkbookKey(ContainerKey): workbook_id: str -class ProjectKey(PlatformKey): +class ProjectKey(ContainerKey): project_id: str @@ -1682,7 +1682,7 @@ def emit_datasource( ) def _get_datasource_container_key(self, datasource, workbook, is_embedded_ds): - container_key: Optional[PlatformKey] = None + container_key: Optional[ContainerKey] = None if is_embedded_ds: # It is embedded then parent is container is workbook if workbook is not None: container_key = self.gen_workbook_key(workbook) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 9d82a9e247a00..ec7d00c7bcc63 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -15,8 +15,8 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( CatalogKey, + ContainerKey, MetastoreKey, - PlatformKey, UnitySchemaKey, add_dataset_to_container, gen_containers, @@ -432,7 +432,7 @@ def gen_catalog_containers(self, catalog: Catalog) -> Iterable[MetadataWorkUnit] external_url=f"{self.external_url_base}/{catalog.name}", ) - def gen_schema_key(self, schema: Schema) -> PlatformKey: + def gen_schema_key(self, schema: Schema) -> ContainerKey: return UnitySchemaKey( unity_schema=schema.name, platform=self.platform, diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py index 412ce36170048..ffa08752070dd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py @@ -22,7 +22,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.clickhouse import ClickHouseConfig from datahub.ingestion.source.usage.usage_common import ( @@ -111,9 +110,6 @@ def create(cls, config_dict, ctx): config = ClickHouseUsageConfig.parse_obj(config_dict) return cls(ctx, config) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: """Gets ClickHouse usage stats as work units""" access_events = self._get_clickhouse_history() diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py index 0aa4efb47ad39..ea817f40f6a2b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py @@ -24,7 +24,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.redshift import RedshiftConfig from datahub.ingestion.source.usage.usage_common import ( @@ -218,9 +217,6 @@ def create(cls, config_dict: Dict, ctx: PipelineContext) -> "RedshiftUsageSource config = RedshiftUsageConfig.parse_obj(config_dict) return cls(config, ctx) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: """Gets Redshift usage stats as work units""" engine: Engine = self._make_sql_engine() diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py index 49f64f3bff661..7dd66fd1e3d0c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py @@ -21,7 +21,6 @@ support_status, ) from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.trino import TrinoConfig from datahub.ingestion.source.usage.usage_common import ( @@ -130,9 +129,6 @@ def create(cls, config_dict, ctx): config = TrinoUsageConfig.parse_obj(config_dict) return cls(ctx, config) - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - return auto_workunit_reporter(self.report, self.get_workunits_internal()) - def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: access_events = self._get_trino_history() # If the query results is empty, we don't want to proceed diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py index 996846e8dd061..f21e3ec319349 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py @@ -132,8 +132,9 @@ class PatternAddDatasetTerms(AddDatasetTerms): def __init__(self, config: PatternDatasetTermsConfig, ctx: PipelineContext): term_pattern = config.term_pattern generic_config = AddDatasetTermsConfig( - get_terms_to_add=lambda _: [ - GlossaryTermAssociationClass(urn=urn) for urn in term_pattern.value(_) + get_terms_to_add=lambda entity_urn: [ + GlossaryTermAssociationClass(urn=term_urn) + for term_urn in term_pattern.value(entity_urn) ], replace_existing=config.replace_existing, semantics=config.semantics, diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index 5b65ba6006838..466e652277fd1 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -14,7 +14,7 @@ from datahub.ingestion.sink.file import write_metadata_file from datahub.ingestion.source.file import read_metadata_file -from datahub.testing.mcp_diff import MCPDiff, get_aspects_by_urn +from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn logger = logging.getLogger(__name__) @@ -98,12 +98,15 @@ def diff_metadata_json( output=output_map, ignore_paths=ignore_paths, ) + except CannotCompareMCPs as e: + logger.info(f"{e}, falling back to MCE diff") except AssertionError as e: logger.warning(f"Reverting to old diff method: {e}") logger.debug("Error with new diff method", exc_info=True) - return DeepDiff( - golden, - output, - exclude_regex_paths=ignore_paths, - ignore_order=True, - ) + + return DeepDiff( + golden, + output, + exclude_regex_paths=ignore_paths, + ignore_order=True, + ) diff --git a/metadata-ingestion/src/datahub/testing/mcp_diff.py b/metadata-ingestion/src/datahub/testing/mcp_diff.py index c253e56a3a707..eb1d9371cc3c4 100644 --- a/metadata-ingestion/src/datahub/testing/mcp_diff.py +++ b/metadata-ingestion/src/datahub/testing/mcp_diff.py @@ -83,6 +83,10 @@ def give_up_diffing(self, *args: Any, **kwargs: Any) -> bool: AspectsByUrn = Dict[str, Dict[str, List[AspectForDiff]]] +class CannotCompareMCPs(Exception): + pass + + def get_aspects_by_urn(obj: object) -> AspectsByUrn: """Restructure a list of serialized MCPs by urn and aspect. Retains information like the original dict and index to facilitate `apply_delta` later. @@ -95,7 +99,7 @@ def get_aspects_by_urn(obj: object) -> AspectsByUrn: for i, entry in enumerate(obj): assert isinstance(entry, dict), entry if "proposedSnapshot" in entry: - raise AssertionError("Found MCEs in output") + raise CannotCompareMCPs("Found MCEs") elif "entityUrn" in entry and "aspectName" in entry and "aspect" in entry: urn = entry["entityUrn"] aspect_name = entry["aspectName"] diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 123a965e5a80a..57f93f27e9147 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -761,6 +761,54 @@ def sqlglot_lineage( default_db: Optional[str] = None, default_schema: Optional[str] = None, ) -> SqlParsingResult: + """Parse a SQL statement and generate lineage information. + + This is a schema-aware lineage generator, meaning that it will use the + schema information for the tables involved to generate lineage information + for the columns involved. The schema_resolver is responsible for providing + the table schema information. + + The parser supports most types of DML statements (SELECT, INSERT, UPDATE, + DELETE, MERGE) as well as CREATE TABLE AS SELECT (CTAS) statements. It + does not support DDL statements (CREATE TABLE, ALTER TABLE, etc.). + + The table-level lineage tends to be fairly reliable, while column-level + can be brittle with respect to missing schema information and complex + SQL logic like UNNESTs. + + The SQL dialect is inferred from the schema_resolver's platform. The + set of supported dialects is the same as sqlglot's. See their + `documentation `_ + for the full list. + + The default_db and default_schema parameters are used to resolve unqualified + table names. For example, the statement "SELECT * FROM my_table" would be + converted to "SELECT * FROM default_db.default_schema.my_table". + + Args: + sql: The SQL statement to parse. This should be a single statement, not + a multi-statement string. + schema_resolver: The schema resolver to use for resolving table schemas. + default_db: The default database to use for unqualified table names. + default_schema: The default schema to use for unqualified table names. + + Returns: + A SqlParsingResult object containing the parsed lineage information. + + The in_tables and out_tables fields contain the input and output tables + for the statement, respectively. These are represented as urns. + The out_tables field will be empty for SELECT statements. + + The column_lineage field contains the column-level lineage information + for the statement. This is a list of ColumnLineageInfo objects, each + representing the lineage for a single output column. The downstream + field contains the output column, and the upstreams field contains the + (urn, column) pairs for the input columns. + + The debug_info field contains debug information about the parsing. If + table_error or column_error are set, then the parsing failed and the + other fields may be incomplete. + """ try: return _sqlglot_lineage_inner( sql=sql, diff --git a/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json b/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json index 67915ca6be713..6dd18f9403188 100644 --- a/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json +++ b/metadata-ingestion/tests/integration/clickhouse/clickhouse_mces_golden.json @@ -75,7 +75,12 @@ "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + } + ] } }, "systemMetadata": { @@ -176,6 +181,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" @@ -246,11 +255,11 @@ "primary_key": "col_Int64", "sampling_key": "", "storage_policy": "default", - "metadata_modification_time": "2023-07-03 18:52:30", + "metadata_modification_time": "2023-07-24 21:34:01", "total_rows": "10", "total_bytes": "671", - "data_paths": "['/var/lib/clickhouse/store/2d4/2d43771f-2a9f-4a28-962e-992a7c08102f/']", - "metadata_path": "/var/lib/clickhouse/store/e2e/e2e3221c-2ffa-4009-b71e-ad306b778310/mv_target_table.sql" + "data_paths": "['/var/lib/clickhouse/store/6cb/6cbac4d1-c700-4f8a-9cc9-542cc349e497/']", + "metadata_path": "/var/lib/clickhouse/store/0a2/0a2bd3dd-893f-4f9a-b310-92e4c830091a/mv_target_table.sql" }, "name": "mv_target_table", "description": "This is target table for materialized view", @@ -394,6 +403,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" @@ -444,11 +457,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "default", - "metadata_modification_time": "2023-07-03 18:52:30", + "metadata_modification_time": "2023-07-24 21:34:01", "total_rows": "0", "total_bytes": "0", - "data_paths": "['/var/lib/clickhouse/store/700/70013972-f4ad-4c8b-a4bd-b397c9cc727f/']", - "metadata_path": "/var/lib/clickhouse/store/e2e/e2e3221c-2ffa-4009-b71e-ad306b778310/test_data_types.sql" + "data_paths": "['/var/lib/clickhouse/store/339/339ddf61-6dc4-47ae-9ae5-a358864e6457/']", + "metadata_path": "/var/lib/clickhouse/store/0a2/0a2bd3dd-893f-4f9a-b310-92e4c830091a/test_data_types.sql" }, "name": "test_data_types", "description": "This table has basic types", @@ -1021,6 +1034,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" @@ -1095,11 +1112,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2023-07-03 18:52:30", + "metadata_modification_time": "2023-07-24 21:34:01", "total_rows": "None", "total_bytes": "None", "data_paths": "[]", - "metadata_path": "/var/lib/clickhouse/store/e2e/e2e3221c-2ffa-4009-b71e-ad306b778310/test_dict.sql" + "metadata_path": "/var/lib/clickhouse/store/0a2/0a2bd3dd-893f-4f9a-b310-92e4c830091a/test_dict.sql" }, "name": "test_dict", "description": "", @@ -1204,6 +1221,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" @@ -1254,11 +1275,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "default", - "metadata_modification_time": "2023-07-03 18:52:30", + "metadata_modification_time": "2023-07-24 21:34:01", "total_rows": "0", "total_bytes": "0", - "data_paths": "['/var/lib/clickhouse/store/04c/04c9735e-af23-4850-a034-8da8f03d75af/']", - "metadata_path": "/var/lib/clickhouse/store/e2e/e2e3221c-2ffa-4009-b71e-ad306b778310/test_nested_data_types.sql" + "data_paths": "['/var/lib/clickhouse/store/22c/22c46b00-4f2a-444a-8bee-73e60b9deba6/']", + "metadata_path": "/var/lib/clickhouse/store/0a2/0a2bd3dd-893f-4f9a-b310-92e4c830091a/test_nested_data_types.sql" }, "name": "test_nested_data_types", "description": "This table has nested types", @@ -1467,6 +1488,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" @@ -1541,11 +1566,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2023-07-03 18:52:30", + "metadata_modification_time": "2023-07-24 21:34:01", "total_rows": "None", "total_bytes": "None", - "data_paths": "['/var/lib/clickhouse/store/2d4/2d43771f-2a9f-4a28-962e-992a7c08102f/']", - "metadata_path": "/var/lib/clickhouse/store/e2e/e2e3221c-2ffa-4009-b71e-ad306b778310/mv_with_target_table.sql", + "data_paths": "['/var/lib/clickhouse/store/6cb/6cbac4d1-c700-4f8a-9cc9-542cc349e497/']", + "metadata_path": "/var/lib/clickhouse/store/0a2/0a2bd3dd-893f-4f9a-b310-92e4c830091a/mv_with_target_table.sql", "view_definition": "", "is_view": "True" }, @@ -1708,6 +1733,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" @@ -1782,11 +1811,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2023-07-03 18:52:30", + "metadata_modification_time": "2023-07-24 21:34:01", "total_rows": "0", "total_bytes": "0", - "data_paths": "['/var/lib/clickhouse/store/649/64934dd8-0347-4e30-a604-8b27022dc799/']", - "metadata_path": "/var/lib/clickhouse/store/e2e/e2e3221c-2ffa-4009-b71e-ad306b778310/mv_without_target_table.sql", + "data_paths": "['/var/lib/clickhouse/store/0f1/0f172bf3-80e5-4ba7-9ae3-938da0a9799d/']", + "metadata_path": "/var/lib/clickhouse/store/0a2/0a2bd3dd-893f-4f9a-b310-92e4c830091a/mv_without_target_table.sql", "view_definition": "", "is_view": "True" }, @@ -1949,6 +1978,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" @@ -2023,11 +2056,11 @@ "primary_key": "", "sampling_key": "", "storage_policy": "", - "metadata_modification_time": "2023-07-03 18:52:30", + "metadata_modification_time": "2023-07-24 21:34:01", "total_rows": "None", "total_bytes": "None", "data_paths": "[]", - "metadata_path": "/var/lib/clickhouse/store/e2e/e2e3221c-2ffa-4009-b71e-ad306b778310/test_view.sql", + "metadata_path": "/var/lib/clickhouse/store/0a2/0a2bd3dd-893f-4f9a-b310-92e4c830091a/test_view.sql", "view_definition": "", "is_view": "True" }, @@ -2138,6 +2171,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:clickhouse,clickhousetestserver)" + }, { "id": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c", "urn": "urn:li:container:ab016b94aa0d75c5b9205c33260e989c" diff --git a/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json b/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json index fa2e0cf6570b5..d56eb50843dd0 100644 --- a/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json +++ b/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json @@ -1,932 +1,1142 @@ [ - { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", - "changeType": "UPSERT", - "aspectName": "domains", - "aspect": { - "json": { - "domains": [ - "urn:li:domain:Engineering" - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", - "changeType": "UPSERT", - "aspectName": "editableDatasetProperties", - "aspect": { - "json": { - "created": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" - }, - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" }, - "description": "new description" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "container", - "entityUrn": "urn:li:container:DATABASE", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "container", - "entityUrn": "urn:li:container:DATABASE", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "container", - "entityUrn": "urn:li:container:DATABASE", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "domains", + "aspect": { + "json": { + "domains": [ + "urn:li:domain:Engineering" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "editableDatasetProperties", + "aspect": { + "json": { + "created": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" + }, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" + }, + "description": "new description" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:DATABASE", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,baz1)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:DATABASE", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,baz1)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,baz1)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:DATABASE", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,baz)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:DATABASE", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,baz1)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,baz)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,baz)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,baz1)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlFeature", - "entityUrn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,baz1)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlFeature", - "entityUrn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlFeature", - "entityUrn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,baz)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlFeatureTable", - "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,baz)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlFeatureTable", - "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlFeatureTable", - "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,baz)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlPrimaryKey", - "entityUrn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlPrimaryKey", - "entityUrn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlPrimaryKey", - "entityUrn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlModel", - "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlModel", - "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlModel", - "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlModelGroup", - "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:science,scienceGroup,PROD)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlModelGroup", - "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:science,scienceGroup,PROD)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlPrimaryKey", + "entityUrn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" + } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "mlModelGroup", - "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:science,scienceGroup,PROD)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlPrimaryKey", + "entityUrn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlPrimaryKey", + "entityUrn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" + } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModel", + "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModel", + "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModel", + "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" + } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModelGroup", + "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:science,scienceGroup,PROD)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(airflow,dag_abc,PROD)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModelGroup", + "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:science,scienceGroup,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModelGroup", + "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:science,scienceGroup,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" + } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(airflow,dag_abc,PROD)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" + } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(airflow,dag_abc,PROD)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" + } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "notebook", - "entityUrn": "urn:li:notebook:(querybook,1234)", - "changeType": "UPSERT", - "aspectName": "glossaryTerms", - "aspect": { - "json": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:CustomerAccount" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,dag_abc,PROD)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "notebook", - "entityUrn": "urn:li:notebook:(querybook,1234)", - "changeType": "UPSERT", - "aspectName": "globalTags", - "aspect": { - "json": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,dag_abc,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,dag_abc,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" + } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "notebook", - "entityUrn": "urn:li:notebook:(querybook,1234)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:datahub", - "type": "TECHNICAL_OWNER" - }, - { - "owner": "urn:li:corpuser:jdoe", - "type": "TECHNICAL_OWNER" - } - ], - "lastModified": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "notebook", + "entityUrn": "urn:li:notebook:(querybook,1234)", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:CustomerAccount" } + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" - } - }, - { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", - "changeType": "UPSERT", - "aspectName": "editableSchemaMetadata", - "aspect": { - "json": { - "created": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "notebook", + "entityUrn": "urn:li:notebook:(querybook,1234)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "notebook", + "entityUrn": "urn:li:notebook:(querybook,1234)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "TECHNICAL_OWNER" }, - "editableSchemaFieldInfo": [ - { - "fieldPath": "field_foo", - "description": "field_foo!", - "glossaryTerms": { - "terms": [ - { - "urn": "urn:li:glossaryTerm:AccountBalance" - } - ], - "auditStamp": { - "time": 1643871600000, - "actor": "urn:li:corpuser:ingestion" + { + "owner": "urn:li:corpuser:jdoe", + "type": "TECHNICAL_OWNER" + } + ], + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "editableSchemaMetadata", + "aspect": { + "json": { + "created": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "editableSchemaFieldInfo": [ + { + "fieldPath": "field_foo", + "description": "field_foo!", + "glossaryTerms": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:AccountBalance" } - } - }, - { - "fieldPath": "field_bar", - "description": "field_bar?", - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Legacy" - } - ] + ], + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" } } - ] - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "test-csv-enricher" + }, + { + "fieldPath": "field_bar", + "description": "field_bar?", + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Legacy" + } + ] + } + } + ] } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,baz1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,dag_abc,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,baz)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:DATABASE", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Legacy", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Legacy" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModel", + "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlPrimaryKey", + "entityUrn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "mlModelGroup", + "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:science,scienceGroup,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" + } +}, +{ + "entityType": "notebook", + "entityUrn": "urn:li:notebook:(querybook,1234)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher" } - ] \ No newline at end of file +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json index 58b08ee844028..52e92d27549f0 100644 --- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json +++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json @@ -135,7 +135,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -155,7 +155,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -170,7 +170,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -185,7 +185,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -202,7 +202,22 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, + "runId": "delta-lake-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -222,7 +237,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -237,7 +252,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -252,7 +267,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -269,7 +284,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -284,7 +299,27 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, + "runId": "delta-lake-test" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:acebf8bcf966274632d3d2b710ef4947", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd", + "urn": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -299,7 +334,7 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, "runId": "delta-lake-test" } }, @@ -310,7 +345,7 @@ "aspectName": "operation", "aspect": { "json": { - "timestampMillis": 1615443388097, + "timestampMillis": 1672531200000, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -326,7 +361,31 @@ } }, "systemMetadata": { - "lastObserved": 1615443388097, + "lastObserved": 1672531200000, + "runId": "delta-lake-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-test-bucket/delta_tables/sales,DEV)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd", + "urn": "urn:li:container:34fc0473e206bb1f4307aadf4177b2fd" + }, + { + "id": "urn:li:container:acebf8bcf966274632d3d2b710ef4947", + "urn": "urn:li:container:acebf8bcf966274632d3d2b710ef4947" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1672531200000, "runId": "delta-lake-test" } } diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index 8790f87c679bb..4dcdf71ce0095 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -166,6 +166,26 @@ "runId": "allow_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:189046201d696e7810132cfa64dad337", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:acf0f3806f475a7397ee745329ef2967", @@ -250,6 +270,30 @@ "runId": "allow_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:1876d057d0ee364677b85427342e2c82", @@ -334,6 +378,34 @@ "runId": "allow_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", @@ -418,6 +490,38 @@ "runId": "allow_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + }, + { + "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:a282913be26fceff334523c2be119df1", @@ -502,6 +606,42 @@ "runId": "allow_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a282913be26fceff334523c2be119df1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + }, + { + "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" + }, + { + "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", + "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", @@ -656,6 +796,46 @@ "runId": "allow_table.json" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + }, + { + "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" + }, + { + "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", + "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" + }, + { + "id": "urn:li:container:a282913be26fceff334523c2be119df1", + "urn": "urn:li:container:a282913be26fceff334523c2be119df1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -838,6 +1018,46 @@ "runId": "allow_table.json" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + }, + { + "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" + }, + { + "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", + "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" + }, + { + "id": "urn:li:container:a282913be26fceff334523c2be119df1", + "urn": "urn:li:container:a282913be26fceff334523c2be119df1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1089,6 +1309,46 @@ "runId": "allow_table.json" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + }, + { + "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" + }, + { + "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", + "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" + }, + { + "id": "urn:li:container:a282913be26fceff334523c2be119df1", + "urn": "urn:li:container:a282913be26fceff334523c2be119df1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1271,6 +1531,46 @@ "runId": "allow_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:3df8f6b0f3a70d42cf70612a2fe5e5ef", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + }, + { + "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" + }, + { + "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", + "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" + }, + { + "id": "urn:li:container:a282913be26fceff334523c2be119df1", + "urn": "urn:li:container:a282913be26fceff334523c2be119df1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", @@ -1424,5 +1724,49 @@ "lastObserved": 1615443388097, "runId": "allow_table.json" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" + }, + { + "id": "urn:li:container:189046201d696e7810132cfa64dad337", + "urn": "urn:li:container:189046201d696e7810132cfa64dad337" + }, + { + "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", + "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" + }, + { + "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", + "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" + }, + { + "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", + "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" + }, + { + "id": "urn:li:container:a282913be26fceff334523c2be119df1", + "urn": "urn:li:container:a282913be26fceff334523c2be119df1" + }, + { + "id": "urn:li:container:3df8f6b0f3a70d42cf70612a2fe5e5ef", + "urn": "urn:li:container:3df8f6b0f3a70d42cf70612a2fe5e5ef" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json index 34d5244a42990..901e4c1262d3f 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json @@ -164,6 +164,21 @@ "runId": "inner_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:974a39dc631803eddedc699cc9bb9759", @@ -246,6 +261,26 @@ "runId": "inner_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", @@ -328,6 +363,30 @@ "runId": "inner_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", @@ -410,6 +469,34 @@ "runId": "inner_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", @@ -492,6 +579,38 @@ "runId": "inner_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", @@ -646,6 +765,42 @@ "runId": "inner_table.json" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + }, + { + "id": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "urn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -828,6 +983,42 @@ "runId": "inner_table.json" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + }, + { + "id": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "urn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1079,6 +1270,42 @@ "runId": "inner_table.json" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + }, + { + "id": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "urn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1259,6 +1486,42 @@ "runId": "inner_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:6bb6dc6de93177210067d00b45b481bb", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + }, + { + "id": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "urn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", @@ -1412,5 +1675,45 @@ "lastObserved": 1615443388097, "runId": "inner_table.json" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + }, + { + "id": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "urn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab" + }, + { + "id": "urn:li:container:6bb6dc6de93177210067d00b45b481bb", + "urn": "urn:li:container:6bb6dc6de93177210067d00b45b481bb" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json index b2ed4316111d0..18474e819334e 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json @@ -164,6 +164,21 @@ "runId": "relative_path.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:85267d161e1a2ffa647cec6c1188549f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "relative_path.json" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,delta_tables/my_table_basic,UAT)", @@ -317,5 +332,25 @@ "lastObserved": 1615443388097, "runId": "relative_path.json" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,delta_tables/my_table_basic,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:85267d161e1a2ffa647cec6c1188549f", + "urn": "urn:li:container:85267d161e1a2ffa647cec6c1188549f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "relative_path.json" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json index afdc3d9360a48..bb47a077e878b 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json @@ -163,6 +163,21 @@ "runId": "single_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:974a39dc631803eddedc699cc9bb9759", @@ -245,6 +260,26 @@ "runId": "single_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", @@ -327,6 +362,30 @@ "runId": "single_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", @@ -409,6 +468,34 @@ "runId": "single_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", @@ -491,6 +578,38 @@ "runId": "single_table.json" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", @@ -533,5 +652,41 @@ "lastObserved": 1615443388097, "runId": "single_table.json" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf", + "urn": "urn:li:container:bdfaaacd66870755e65612e0b88dd4bf" + }, + { + "id": "urn:li:container:974a39dc631803eddedc699cc9bb9759", + "urn": "urn:li:container:974a39dc631803eddedc699cc9bb9759" + }, + { + "id": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6", + "urn": "urn:li:container:dae543a1ed7ecfea4079a971dc7805a6" + }, + { + "id": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4", + "urn": "urn:li:container:ee050cda8eca59687021c24cbc0bb8a4" + }, + { + "id": "urn:li:container:ad4b596846e8e010114b1ec82b324fab", + "urn": "urn:li:container:ad4b596846e8e010114b1ec82b324fab" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json b/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json index d69ecda7473ea..1b91925289845 100644 --- a/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json +++ b/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { "urn": "urn:li:mlPrimaryKey:(feature_store.driver_hourly_stats,driver_id)", @@ -14,7 +13,6 @@ "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { "description": "Driver ID", "dataType": "ORDINAL", - "version": null, "sources": [ "urn:li:dataset:(urn:li:dataPlatform:file,data.driver_stats_with_string.parquet,PROD)" ] @@ -23,17 +21,12 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { "urn": "urn:li:mlFeature:(feature_store.driver_hourly_stats,conv_rate)", @@ -47,7 +40,6 @@ "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { "description": "Conv rate", "dataType": "CONTINUOUS", - "version": null, "sources": [ "urn:li:dataset:(urn:li:dataPlatform:file,data.driver_stats_with_string.parquet,PROD)" ] @@ -56,17 +48,12 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { "urn": "urn:li:mlFeature:(feature_store.driver_hourly_stats,acc_rate)", @@ -80,7 +67,6 @@ "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { "description": "Acc rate", "dataType": "CONTINUOUS", - "version": null, "sources": [ "urn:li:dataset:(urn:li:dataPlatform:file,data.driver_stats_with_string.parquet,PROD)" ] @@ -89,17 +75,12 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { "urn": "urn:li:mlFeature:(feature_store.driver_hourly_stats,avg_daily_trips)", @@ -113,7 +94,6 @@ "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { "description": "Avg daily trips", "dataType": "ORDINAL", - "version": null, "sources": [ "urn:li:dataset:(urn:li:dataPlatform:file,data.driver_stats_with_string.parquet,PROD)" ] @@ -122,17 +102,12 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { "urn": "urn:li:mlFeature:(feature_store.driver_hourly_stats,string_feature)", @@ -146,7 +121,6 @@ "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { "description": "String feature", "dataType": "TEXT", - "version": null, "sources": [ "urn:li:dataset:(urn:li:dataPlatform:file,data.driver_stats_with_string.parquet,PROD)" ] @@ -155,17 +129,12 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,feature_store.driver_hourly_stats)", @@ -185,7 +154,6 @@ { "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { "customProperties": {}, - "description": null, "mlFeatures": [ "urn:li:mlFeature:(feature_store.driver_hourly_stats,conv_rate)", "urn:li:mlFeature:(feature_store.driver_hourly_stats,acc_rate)", @@ -200,17 +168,31 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,feature_store.driver_hourly_stats)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "feature_store" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { "urn": "urn:li:mlFeature:(feature_store.transformed_conv_rate,conv_rate_plus_val1)", @@ -222,9 +204,7 @@ }, { "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, "dataType": "CONTINUOUS", - "version": null, "sources": [ "urn:li:dataset:(urn:li:dataPlatform:request,vals_to_add,PROD)", "urn:li:dataset:(urn:li:dataPlatform:file,data.driver_stats_with_string.parquet,PROD)" @@ -234,17 +214,12 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { "urn": "urn:li:mlFeature:(feature_store.transformed_conv_rate,conv_rate_plus_val2)", @@ -256,9 +231,7 @@ }, { "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, "dataType": "CONTINUOUS", - "version": null, "sources": [ "urn:li:dataset:(urn:li:dataPlatform:request,vals_to_add,PROD)", "urn:li:dataset:(urn:li:dataPlatform:file,data.driver_stats_with_string.parquet,PROD)" @@ -268,17 +241,12 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,feature_store.transformed_conv_rate)", @@ -298,7 +266,6 @@ { "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { "customProperties": {}, - "description": null, "mlFeatures": [ "urn:li:mlFeature:(feature_store.transformed_conv_rate,conv_rate_plus_val1)", "urn:li:mlFeature:(feature_store.transformed_conv_rate,conv_rate_plus_val2)" @@ -309,13 +276,28 @@ ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "feast-repository-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "feast-repository-test" + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,feature_store.transformed_conv_rate)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "feature_store" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "feast-repository-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json index 3f307925ee9d5..6e57dfaae0ce0 100644 --- a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json +++ b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json @@ -1,30 +1,41 @@ [ { "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { - "urn": "urn:li:dashboard:(metabase,1)", + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(metabase,1)", "aspects": [ { - "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { - "customProperties": {}, - "title": "Dashboard 1", + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "customProperties": { + "Metrics": "", + "Filters": "", + "Dimensions": "customer_id, first_name, last_name, amount, payment_date, rental_id" + }, + "title": "Customer Payment", "description": "", - "charts": [ - "urn:li:chart:(metabase,1)", - "urn:li:chart:(metabase,2)" - ], - "datasets": [], "lastModified": { "created": { - "time": 1639417721742, + "time": 1639417592792, "actor": "urn:li:corpuser:admin@metabase.com" }, "lastModified": { - "time": 1639417721742, + "time": 1639417592792, "actor": "urn:li:corpuser:admin@metabase.com" } }, - "dashboardUrl": "http://localhost:3000/dashboard/1" + "chartUrl": "http://localhost:3000/card/1", + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-data.public.payment,PROD)" + } + ], + "type": "TABLE" + } + }, + { + "com.linkedin.pegasus2avro.chart.ChartQuery": { + "rawQuery": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "type": "SQL" } }, { @@ -52,40 +63,34 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { - "urn": "urn:li:chart:(metabase,1)", + "urn": "urn:li:chart:(metabase,2)", "aspects": [ { "com.linkedin.pegasus2avro.chart.ChartInfo": { "customProperties": { - "Metrics": "", - "Filters": "", - "Dimensions": "customer_id, first_name, last_name, amount, payment_date, rental_id" + "Metrics": "Count", + "Filters": "['=', ['field', 136, None], 2006]", + "Dimensions": "Rating" }, - "title": "Customer Payment", + "title": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending", "description": "", "lastModified": { "created": { - "time": 1639417592792, + "time": 1636614000000, "actor": "urn:li:corpuser:admin@metabase.com" }, "lastModified": { - "time": 1639417592792, + "time": 1636614000000, "actor": "urn:li:corpuser:admin@metabase.com" } }, - "chartUrl": "http://localhost:3000/card/1", + "chartUrl": "http://localhost:3000/card/2", "inputs": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-data.public.payment,PROD)" + "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.film,PROD)" } ], - "type": "TABLE" - } - }, - { - "com.linkedin.pegasus2avro.chart.ChartQuery": { - "rawQuery": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", - "type": "SQL" + "type": "BAR" } }, { @@ -112,36 +117,30 @@ }, { "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { - "urn": "urn:li:chart:(metabase,2)", + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(metabase,1)", "aspects": [ { - "com.linkedin.pegasus2avro.chart.ChartInfo": { - "customProperties": { - "Metrics": "Count", - "Filters": "['=', ['field', 136, None], 2006]", - "Dimensions": "Rating" - }, - "title": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending", + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "customProperties": {}, + "title": "Dashboard 1", "description": "", + "charts": [ + "urn:li:chart:(metabase,1)", + "urn:li:chart:(metabase,2)" + ], + "datasets": [], "lastModified": { "created": { - "time": 1639417717110, + "time": 1639417721742, "actor": "urn:li:corpuser:admin@metabase.com" }, "lastModified": { - "time": 1639417717110, + "time": 1639417721742, "actor": "urn:li:corpuser:admin@metabase.com" - }, - "deleted": null - }, - "chartUrl": "http://localhost:3000/card/2", - "inputs": [ - { - "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.film,PROD)" } - ], - "type": "BAR" + }, + "dashboardUrl": "http://localhost:3000/dashboard/1" } }, { @@ -165,5 +164,50 @@ "lastObserved": 1636614000000, "runId": "metabase-test" } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(metabase,1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(metabase,2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(metabase,1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json index 003a74ed0a6d1..62e14520e53e8 100644 --- a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json +++ b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { "urn": "urn:li:dashboard:(mode,2934237)", @@ -8,7 +7,6 @@ { "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { "customProperties": {}, - "externalUrl": null, "title": "Report 1", "description": "First Report", "charts": [ @@ -18,19 +16,14 @@ "lastModified": { "created": { "time": 1639169724316, - "actor": "urn:li:corpuser:modeuser", - "impersonator": null + "actor": "urn:li:corpuser:modeuser" }, "lastModified": { "time": 1639182684451, - "actor": "urn:li:corpuser:modeuser", - "impersonator": null - }, - "deleted": null + "actor": "urn:li:corpuser:modeuser" + } }, - "dashboardUrl": "https://app.mode.com/acryl/reports/9d2da37fa91e", - "access": null, - "lastRefreshed": null + "dashboardUrl": "https://app.mode.com/acryl/reports/9d2da37fa91e" } }, { @@ -45,31 +38,49 @@ "owners": [ { "owner": "urn:li:corpuser:modeuser", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" } } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1638860400000, - "runId": "mode-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "mode-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(mode,2934237)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "acryl" + }, + { + "id": "AcrylTest" + }, + { + "id": "Report 1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { "urn": "urn:li:chart:(mode,f622b9ee725b)", @@ -80,37 +91,31 @@ "Columns": "payment_date,Staff First Name,amount,Staff Last Name", "Filters": "amount" }, - "externalUrl": null, "title": "Customer Staff Table", "description": "", "lastModified": { "created": { "time": 1639170083088, - "actor": "urn:li:corpuser:modeuser", - "impersonator": null + "actor": "urn:li:corpuser:modeuser" }, "lastModified": { "time": 1639182684438, - "actor": "urn:li:corpuser:modeuser", - "impersonator": null - }, - "deleted": null + "actor": "urn:li:corpuser:modeuser" + } }, "chartUrl": "https://app.mode.com/acryltest/reports/9d2da37fa91e/viz/f622b9ee725b", "inputs": [ { - "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.rental,PROD)" + "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.staff,PROD)" }, { "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.payment,PROD)" }, { - "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.staff,PROD)" + "string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.rental,PROD)" } ], - "type": "TABLE", - "access": null, - "lastRefreshed": null + "type": "TABLE" } }, { @@ -131,27 +136,82 @@ "owners": [ { "owner": "urn:li:corpuser:modeuser", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" } } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1638860400000, - "runId": "mode-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "mode-test" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(mode,f622b9ee725b)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(mode,f622b9ee725b)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "acryl" + }, + { + "id": "AcrylTest" + }, + { + "id": "Report 1" + }, + { + "id": "Customer and staff" + }, + { + "id": "Customer Staff Table" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(mode,2934237)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json index 7d69ee10196f6..1f662cfe514e2 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json @@ -4132,5 +4132,65 @@ "lastObserved": 1615443388097, "runId": "mongodb-test" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json b/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json index e668525b930af..7597013bd873a 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json @@ -400,5 +400,159 @@ "lastObserved": 1586847600000, "runId": "mysql-2020_04_14-07_00_00" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "rowCount": 0, + "columnCount": 6, + "fieldProfiles": [ + { + "fieldPath": "id", + "uniqueCount": 5, + "uniqueProportion": 1, + "nullCount": 0, + "min": "1", + "max": "5", + "mean": "3.0", + "median": "3", + "stdev": "1.5811388300841898", + "sampleValues": [ + "1", + "2", + "3", + "4", + "5" + ] + }, + { + "fieldPath": "company", + "uniqueCount": 5, + "uniqueProportion": 1, + "nullCount": 0, + "sampleValues": [ + "Company A", + "Company B", + "Company C", + "Company D", + "Company E" + ] + }, + { + "fieldPath": "last_name", + "uniqueCount": 5, + "uniqueProportion": 1, + "nullCount": 0, + "sampleValues": [ + "Axen", + "Bedecs", + "Donnell", + "Gratacos Solsona", + "Lee" + ] + }, + { + "fieldPath": "first_name", + "uniqueCount": 5, + "uniqueProportion": 1, + "nullCount": 0, + "sampleValues": [ + "Anna", + "Antonio", + "Christina", + "Martin", + "Thomas" + ] + }, + { + "fieldPath": "email_address", + "uniqueCount": 0, + "nullCount": 0, + "sampleValues": [] + }, + { + "fieldPath": "priority", + "uniqueCount": 3, + "uniqueProportion": 0.75, + "nullCount": 0, + "min": "3.8", + "max": "4.9", + "mean": "4.175000011920929", + "median": "4.0", + "stdev": "0.49244294899530355", + "sampleValues": [ + "4.0", + "4.9", + "4.0", + "3.8" + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "rowCount": 0, + "columnCount": 3, + "fieldProfiles": [ + { + "fieldPath": "id", + "uniqueCount": 0, + "nullCount": 0, + "min": "None", + "max": "None", + "mean": "None", + "median": "None", + "stdev": "0.0", + "sampleValues": [] + }, + { + "fieldPath": "description", + "uniqueCount": 0, + "nullCount": 0, + "sampleValues": [] + }, + { + "fieldPath": "customer_id", + "uniqueCount": 0, + "nullCount": 0, + "min": "None", + "max": "None", + "mean": "None", + "median": "None", + "stdev": "0.0", + "sampleValues": [] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_cluster.json b/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_cluster.json index c38b134a97243..e0ab6bbec7b30 100644 --- a/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_cluster.json +++ b/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_cluster.json @@ -652,5 +652,215 @@ "lastObserved": 1638532800000, "runId": "nifi-test-cluster" } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),3ec2acd6-a0d4-3198-9066-a59fb757bc05)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),8eb5263d-017d-1000-ffff-ffff911b23aa)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),8a218b6e-e6a0-36b6-bc4b-79d202a80167)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),71bc17ed-a3bc-339a-a100-ebad434717d4)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),c5f6fc66-ffbb-3f60-9564-f2466ae32493)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),c8c73d4c-ebdd-1bee-9b46-629672cd11a0)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),8eb55aeb-017d-1000-ffff-fffff475768d)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,80820b2f-017d-1000-85cf-05f56cde9185,PROD),fed5914b-937b-37dd-89c0-b34ffbae9cf4)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,sftp_public_host,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:nifi,default.s3_data,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,sftp_public_host.temperature,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:nifi,default.sftp_files_out,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,enriched-topical-chat,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-cluster" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_standalone.json b/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_standalone.json index 5fe65a651ee65..48a22bebb3f8e 100644 --- a/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_standalone.json +++ b/metadata-ingestion/tests/integration/nifi/nifi_mces_golden_standalone.json @@ -227,5 +227,80 @@ "lastObserved": 1638532800000, "runId": "nifi-test-standalone" } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(nifi,803ebb92-017d-1000-2961-4bdaa27a3ba0,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-standalone" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,803ebb92-017d-1000-2961-4bdaa27a3ba0,PROD),91d59f03-1c2b-3f3f-48bc-f89296a328bd)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-standalone" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,803ebb92-017d-1000-2961-4bdaa27a3ba0,PROD),aed63edf-e660-3f29-b56b-192cf6286889)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-standalone" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(nifi,803ebb92-017d-1000-2961-4bdaa27a3ba0,PROD),cb7693ed-f93b-3340-3776-fe80e6283ddc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-standalone" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,enriched-topical-chat,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638532800000, + "runId": "nifi-test-standalone" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/openapi/openapi_mces_golden.json b/metadata-ingestion/tests/integration/openapi/openapi_mces_golden.json index ede0006ef08d7..ad270857dd7fc 100755 --- a/metadata-ingestion/tests/integration/openapi/openapi_mces_golden.json +++ b/metadata-ingestion/tests/integration/openapi/openapi_mces_golden.json @@ -148,5 +148,35 @@ "lastObserved": 1586847600000, "runId": "openapi-2020_04_14-07_00_00" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:OpenApi,test_openapi.root,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "openapi-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:OpenApi,test_openapi.v2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "openapi-2020_04_14-07_00_00" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json index 06decfe7d5b4a..fb1c42705c8e8 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_platform_instance_ingest.json @@ -778,6 +778,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" + }, { "id": "demo-workspace" } @@ -884,6 +888,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" + }, { "id": "demo-workspace" } @@ -1016,6 +1024,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:powerbi,aws-ap-south-1)" + }, { "id": "demo-workspace" } diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json new file mode 100644 index 0000000000000..43a7080b17392 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json @@ -0,0 +1,394 @@ +[ +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "TEST_USER" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testa" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testa", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testa" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "powerbi_report_server" + }, + { + "id": "server_alias" + }, + { + "id": "Reports" + }, + { + "id": "path" + }, + { + "id": "to" + }, + { + "id": "Testa" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "TEST_USER" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testd" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testd", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testd" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "powerbi_report_server" + }, + { + "id": "server_alias" + }, + { + "id": "Reports" + }, + { + "id": "path" + }, + { + "id": "to" + }, + { + "id": "Testd" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json index 9b202baa947d5..6c36f7b0e33e4 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json @@ -5,8 +5,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -19,8 +23,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -33,8 +38,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -47,8 +53,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testa\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testa" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -61,8 +70,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testa\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testa\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testa", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testa" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -75,8 +108,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -89,8 +123,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -103,8 +139,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -112,13 +162,33 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:TEST_USER", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", "changeType": "UPSERT", - "aspectName": "corpUserInfo", + "aspectName": "browsePathsV2", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "path": [ + { + "id": "powerbi_report_server" + }, + { + "id": "server_alias" + }, + { + "id": "Reports" + }, + { + "id": "path" + }, + { + "id": "to" + }, + { + "id": "Testa" + } + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -129,10 +199,14 @@ "entityType": "corpuser", "entityUrn": "urn:li:corpuser:TEST_USER", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "corpUserInfo", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -143,52 +217,11 @@ "entityType": "corpuser", "entityUrn": "urn:li:corpuser:TEST_USER", "changeType": "UPSERT", - "aspectName": "corpUserKey", - "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "browsePaths", - "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testb\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "dashboardInfo", - "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testb\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testb\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -196,13 +229,14 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", "changeType": "UPSERT", - "aspectName": "dashboardKey", + "aspectName": "corpUserKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938b\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -210,13 +244,17 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "corpUserInfo", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -227,10 +265,11 @@ "entityType": "corpuser", "entityUrn": "urn:li:corpuser:TEST_USER", "changeType": "UPSERT", - "aspectName": "corpUserInfo", + "aspectName": "status", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -241,10 +280,11 @@ "entityType": "corpuser", "entityUrn": "urn:li:corpuser:TEST_USER", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "corpUserKey", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -252,13 +292,16 @@ } }, { - "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:TEST_USER", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", "changeType": "UPSERT", - "aspectName": "corpUserKey", + "aspectName": "browsePaths", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testc" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -269,10 +312,34 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", "changeType": "UPSERT", - "aspectName": "browsePaths", + "aspectName": "dashboardInfo", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testc\"]}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testc", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testc" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -283,10 +350,30 @@ "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938c)", "changeType": "UPSERT", - "aspectName": "dashboardInfo", + "aspectName": "browsePathsV2", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testc\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testc\"}", - "contentType": "application/json" + "json": { + "path": [ + { + "id": "powerbi_report_server" + }, + { + "id": "server_alias" + }, + { + "id": "Reports" + }, + { + "id": "path" + }, + { + "id": "to" + }, + { + "id": "Testc" + } + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -299,8 +386,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -313,8 +401,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938c\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938c" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -327,8 +417,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -341,8 +445,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -355,8 +463,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -369,8 +478,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -383,8 +493,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testd\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testd" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -397,8 +510,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testd\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testd\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testd", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testd" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -411,8 +548,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -425,8 +563,44 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "powerbi_report_server" + }, + { + "id": "server_alias" + }, + { + "id": "Reports" + }, + { + "id": "path" + }, + { + "id": "to" + }, + { + "id": "Testd" + } + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -439,8 +613,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py index c03190be66964..826c2b77bce36 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py +++ b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py @@ -21,7 +21,7 @@ def mock_user_to_add(*args, **kwargs): return None -def register_mock_api(request_mock): +def register_mock_api(request_mock, override_mock_data={}): api_vs_response = { "https://host_port/Reports/api/v2.0/Reports": { "method": "GET", @@ -52,37 +52,6 @@ def register_mock_api(request_mock): ] }, }, - "https://host_port/Reports/api/v2.0/MobileReports": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "Id": "ee56dc21-248a-4138-a446-ee5ab1fc938b", - "Name": "Testb", - "Description": None, - "Path": "/path/to/Testb", - "Type": "MobileReport", - "Hidden": False, - "Size": 1010101, - "ModifiedBy": "TEST_USER", - "ModifiedDate": str(datetime.now()), - "CreatedBy": "TEST_USER", - "CreatedDate": str(datetime.now()), - "ParentFolderId": "47495172-89ab-455f-a446-fffd3cf239cb", - "IsFavorite": False, - "ContentType": None, - "Content": "", - "HasDataSources": True, - "Roles": [], - "HasSharedDataSets": True, - "HasParameters": True, - "AllowCaching": True, - "Manifest": {"Resources": []}, - }, - ] - }, - }, "https://host_port/Reports/api/v2.0/LinkedReports": { "method": "GET", "status_code": 200, @@ -141,6 +110,8 @@ def register_mock_api(request_mock): }, } + api_vs_response.update(override_mock_data) + for url in api_vs_response.keys(): request_mock.register_uri( api_vs_response[url]["method"], @@ -164,6 +135,30 @@ def default_source_config(): } +def get_default_recipe(output_path: str) -> dict: + return { + "run_id": "powerbi-report-server-test", + "source": { + "type": "powerbi-report-server", + "config": { + **default_source_config(), + }, + }, + "sink": { + "type": "file", + "config": {"filename": output_path}, # , + }, + } + + +def add_mock_method_in_pipeline(pipeline: Pipeline) -> None: + pipeline.ctx.graph = mock.MagicMock() + pipeline.ctx.graph.get_ownership = mock.MagicMock() + pipeline.ctx.graph.get_ownership.side_effect = mock_existing_users + pipeline.ctx.graph.get_aspect_v2 = mock.MagicMock() + pipeline.ctx.graph.get_aspect_v2.side_effect = mock_user_to_add + + @freeze_time(FROZEN_TIME) @mock.patch("requests_ntlm.HttpNtlmAuth") def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock): @@ -174,34 +169,54 @@ def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_m register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( - { - "run_id": "powerbi-report-server-test", - "source": { - "type": "powerbi-report-server", - "config": { - **default_source_config(), - }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/powerbi_report_server_mces.json", - }, - }, - } + get_default_recipe(output_path=f"{tmp_path}/powerbi_report_server_mces.json") ) - pipeline.ctx.graph = mock.MagicMock() - pipeline.ctx.graph.get_ownership = mock.MagicMock() - pipeline.ctx.graph.get_ownership.side_effect = mock_existing_users - pipeline.ctx.graph.get_aspect_v2 = mock.MagicMock() - pipeline.ctx.graph.get_aspect_v2.side_effect = mock_user_to_add + + add_mock_method_in_pipeline(pipeline=pipeline) + + pipeline.run() + pipeline.raise_from_status() + + golden_file = "golden_test_ingest.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "powerbi_report_server_mces.json", + golden_path=f"{test_resources_dir}/{golden_file}", + ) + + +@freeze_time(FROZEN_TIME) +@mock.patch("requests_ntlm.HttpNtlmAuth") +def test_powerbi_ingest_with_failure( + mock_msal, pytestconfig, tmp_path, mock_time, requests_mock +): + test_resources_dir = ( + pytestconfig.rootpath / "tests/integration/powerbi_report_server" + ) + + register_mock_api( + request_mock=requests_mock, + override_mock_data={ + "https://host_port/Reports/api/v2.0/LinkedReports": { + "method": "GET", + "status_code": 404, + "json": {"error": "Request Failed"}, + } + }, + ) + + pipeline = Pipeline.create( + get_default_recipe(output_path=f"{tmp_path}/powerbi_report_server_mces.json") + ) + + add_mock_method_in_pipeline(pipeline=pipeline) pipeline.run() pipeline.raise_from_status() - mce_out_file = "golden_test_ingest.json" + golden_file = "golden_test_fail_api_ingest.json" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / "powerbi_report_server_mces.json", - golden_path=f"{test_resources_dir}/{mce_out_file}", + golden_path=f"{test_resources_dir}/{golden_file}", ) diff --git a/metadata-ingestion/tests/integration/remote/golden/remote_enricher_golden.json b/metadata-ingestion/tests/integration/remote/golden/remote_enricher_golden.json index 3a5970c1ce70b..b3a86bbd8616b 100644 --- a/metadata-ingestion/tests/integration/remote/golden/remote_enricher_golden.json +++ b/metadata-ingestion/tests/integration/remote/golden/remote_enricher_golden.json @@ -158,5 +158,35 @@ "lastObserved": 1629795600000, "runId": "remote-1" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "remote-1" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Legacy", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Legacy" + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "remote-1" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/remote/golden/remote_lineage_golden.json b/metadata-ingestion/tests/integration/remote/golden/remote_lineage_golden.json index 085e2e7b33a7e..4297be4eac62f 100644 --- a/metadata-ingestion/tests/integration/remote/golden/remote_lineage_golden.json +++ b/metadata-ingestion/tests/integration/remote/golden/remote_lineage_golden.json @@ -6,7 +6,6 @@ "aspectName": "upstreamLineage", "aspect": { "json": { - "fineGrainedLineages": [], "upstreams": [ { "auditStamp": { @@ -24,7 +23,8 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:kafka,topic1,DEV)", "type": "TRANSFORMED" } - ] + ], + "fineGrainedLineages": [] } }, "systemMetadata": { @@ -39,7 +39,6 @@ "aspectName": "upstreamLineage", "aspect": { "json": { - "fineGrainedLineages": [], "upstreams": [ { "auditStamp": { @@ -49,7 +48,38 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test.kafka.topic2,PROD)", "type": "TRANSFORMED" } - ] + ], + "fineGrainedLineages": [] + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "remote-3" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,topic2,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "remote-3" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,topic3,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } }, "systemMetadata": { @@ -57,4 +87,4 @@ "runId": "remote-3" } } -] +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json index 818a8b7a5adde..d042c3fbb158b 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json @@ -211,7 +211,12 @@ "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + ] } }, "systemMetadata": { @@ -311,6 +316,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -415,6 +424,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -523,6 +536,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -635,6 +652,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -751,6 +772,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -871,6 +896,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -995,6 +1024,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -1123,6 +1156,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -1201,6 +1238,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index 7fc7e79a643b7..8e4fcb80ff855 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -523,7 +523,12 @@ "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + ] } }, "systemMetadata": { @@ -623,6 +628,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -727,6 +736,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -835,6 +848,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -947,6 +964,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -1063,6 +1084,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -1183,6 +1208,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -1307,6 +1336,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -2663,6 +2696,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -3196,6 +3233,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -3745,6 +3786,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -3953,6 +3998,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -4456,6 +4505,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -4818,6 +4871,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" @@ -7489,6 +7546,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + }, { "id": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "urn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0" diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index cb7ccf87f7c6f..58b81065c190f 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -211,7 +211,12 @@ "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + ] } }, "systemMetadata": { @@ -311,6 +316,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -415,6 +424,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -523,6 +536,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -635,6 +652,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -697,6 +718,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index 40e45150fa211..0c1d92ed58e3d 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -523,7 +523,12 @@ "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + ] } }, "systemMetadata": { @@ -623,6 +628,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -727,6 +736,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -835,6 +848,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -878,6 +895,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -1061,6 +1082,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -1244,6 +1269,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -1403,6 +1432,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -1586,6 +1619,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -1769,6 +1806,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" @@ -2264,6 +2305,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" diff --git a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json index 3b498707c4fb4..4e54f199eafed 100644 --- a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json +++ b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json @@ -1974,5 +1974,65 @@ "lastObserved": 1652353200000, "runId": "salesforce-test" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:salesforce,Account,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1652353200000, + "runId": "salesforce-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:salesforce,Property__c,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1652353200000, + "runId": "salesforce-test" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Custom", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Custom" + } + }, + "systemMetadata": { + "lastObserved": 1652353200000, + "runId": "salesforce-test" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:SystemField", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "SystemField" + } + }, + "systemMetadata": { + "lastObserved": 1652353200000, + "runId": "salesforce-test" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json b/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json index 55fa1ff9a2dc1..f6c919edc2f45 100644 --- a/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json +++ b/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json @@ -1,21 +1,62 @@ [ { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,testcatalog.testschema.testtable,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "datasetUsageStatistics", "aspect": { - "value": "{\"timestampMillis\": 1634169600000, \"eventGranularity\": {\"unit\": \"DAY\", \"multiple\": 1}, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"uniqueUserCount\": 1, \"totalSqlQueries\": 2, \"topSqlQueries\": [\"select * from testcatalog.testschema.testtable limit 100\"], \"userCounts\": [{\"user\": \"urn:li:corpuser:test-name\", \"count\": 2, \"userEmail\": \"test-name@acryl.io\"}], \"fieldCounts\": [{\"fieldPath\": \"column1\", \"count\": 2}, {\"fieldPath\": \"column2\", \"count\": 2}]}", - "contentType": "application/json" + "json": { + "timestampMillis": 1634169600000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "uniqueUserCount": 1, + "totalSqlQueries": 2, + "topSqlQueries": [ + "select * from testcatalog.testschema.testtable limit 100" + ], + "userCounts": [ + { + "user": "urn:li:corpuser:test-name", + "count": 2, + "userEmail": "test-name@acryl.io" + } + ], + "fieldCounts": [ + { + "fieldPath": "column1", + "count": 2 + }, + { + "fieldPath": "column2", + "count": 2 + } + ] + } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-trino-usage", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "test-trino-usage" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,testcatalog.testschema.testtable,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1629795600000, + "runId": "test-trino-usage" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json index f82cf56ce2b81..65e74b1899069 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json @@ -74,7 +74,12 @@ "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + } + ] } }, "systemMetadata": { @@ -157,7 +162,12 @@ "aspectName": "browsePathsV2", "aspect": { "json": { - "path": [] + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + } + ] } }, "systemMetadata": { @@ -282,6 +292,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -430,6 +444,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -559,6 +577,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -688,6 +710,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -980,6 +1006,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -1542,6 +1572,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -2156,6 +2190,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -2666,6 +2704,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -2812,6 +2854,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -3126,6 +3172,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -3324,6 +3374,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -3826,6 +3880,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -4296,6 +4354,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -4798,6 +4860,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -5181,6 +5247,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -5544,6 +5614,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -5930,6 +6004,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -6371,6 +6449,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -6786,6 +6868,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -7117,6 +7203,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -7480,6 +7570,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -7733,6 +7827,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -8174,6 +8272,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -8511,6 +8613,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -8874,6 +8980,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -9263,6 +9373,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -9704,6 +9818,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -9893,6 +10011,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -10019,6 +10141,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -10122,6 +10248,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -10223,6 +10353,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -10338,6 +10472,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -12530,6 +12668,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -12927,6 +13069,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -13436,6 +13582,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -14682,6 +14832,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -21503,6 +21657,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -25653,6 +25811,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -31027,6 +31189,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -31441,6 +31607,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -31882,6 +32052,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -32938,6 +33112,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "Samples" } @@ -33157,6 +33335,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" @@ -33386,6 +33568,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -33581,6 +33767,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -33857,6 +34047,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -34107,6 +34301,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -34305,6 +34503,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -34490,6 +34692,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -34662,6 +34868,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -34782,6 +34992,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -34876,6 +35090,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "Samples" }, @@ -34967,6 +35185,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "Samples" }, @@ -35305,6 +35527,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "Samples" }, @@ -36178,6 +36404,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -37156,6 +37386,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -38212,6 +38446,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -39125,6 +39363,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -39479,6 +39721,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -40418,6 +40664,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -41513,6 +41763,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -42465,6 +42719,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -42511,6 +42769,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -42554,6 +42816,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, @@ -42596,6 +42862,10 @@ "aspect": { "json": { "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, { "id": "default" }, diff --git a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json index ceda91359b8d0..8163545155302 100644 --- a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json +++ b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json @@ -1,1815 +1,2095 @@ [ - { - "auditHeader": null, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test-2,some-feature-1)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "TEXT", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { - "urn": "urn:li:mlPrimaryKey:(test-2,some-feature-2)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { - "description": null, - "dataType": "ORDINAL", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test-2,some-feature-3)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "CONTINUOUS", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { + "urn": "urn:li:mlFeature:(test-2,some-feature-1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { + "dataType": "TEXT", + "sources": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { - "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test-2)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/sagemaker" - ] - } - }, - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { - "customProperties": { - "arn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-2", - "creation_time": "2021-06-24 09:48:37.035000", - "status": "Created" - }, - "description": "Yet another test feature group", - "mlFeatures": [ - "urn:li:mlFeature:(test-2,some-feature-1)", - "urn:li:mlFeature:(test-2,some-feature-3)" - ], - "mlPrimaryKeys": [ - "urn:li:mlPrimaryKey:(test-2,some-feature-2)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { + "urn": "urn:li:mlPrimaryKey:(test-2,some-feature-2)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { + "dataType": "ORDINAL", + "sources": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test-1,name)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "TEXT", - "version": null, - "sources": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { + "urn": "urn:li:mlFeature:(test-2,some-feature-3)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { + "dataType": "CONTINUOUS", + "sources": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { - "urn": "urn:li:mlPrimaryKey:(test-1,id)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { - "description": null, - "dataType": "ORDINAL", - "version": null, - "sources": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { + "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test-2)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/sagemaker" + ] + } + }, + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { + "customProperties": { + "arn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-2", + "creation_time": "2021-06-24 09:48:37.035000", + "status": "Created" + }, + "description": "Yet another test feature group", + "mlFeatures": [ + "urn:li:mlFeature:(test-2,some-feature-1)", + "urn:li:mlFeature:(test-2,some-feature-3)" + ], + "mlPrimaryKeys": [ + "urn:li:mlPrimaryKey:(test-2,some-feature-2)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test-1,height)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "CONTINUOUS", - "version": null, - "sources": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { + "urn": "urn:li:mlFeature:(test-1,name)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { + "dataType": "TEXT", + "sources": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test-1,time)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "TEXT", - "version": null, - "sources": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { + "urn": "urn:li:mlPrimaryKey:(test-1,id)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { + "dataType": "ORDINAL", + "sources": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { - "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test-1)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/sagemaker" - ] - } - }, - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { - "customProperties": { - "arn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-1", - "creation_time": "2021-06-23 13:58:10.264000", - "status": "Created" - }, - "description": "First test feature group", - "mlFeatures": [ - "urn:li:mlFeature:(test-1,name)", - "urn:li:mlFeature:(test-1,height)", - "urn:li:mlFeature:(test-1,time)" - ], - "mlPrimaryKeys": [ - "urn:li:mlPrimaryKey:(test-1,id)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { + "urn": "urn:li:mlFeature:(test-1,height)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { + "dataType": "CONTINUOUS", + "sources": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { - "urn": "urn:li:mlPrimaryKey:(test,feature_1)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { - "description": null, - "dataType": "TEXT", - "version": null, - "sources": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { + "urn": "urn:li:mlFeature:(test-1,time)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { + "dataType": "TEXT", + "sources": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test,feature_2)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "ORDINAL", - "version": null, - "sources": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { + "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test-1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/sagemaker" + ] + } + }, + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { + "customProperties": { + "arn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-1", + "creation_time": "2021-06-23 13:58:10.264000", + "status": "Created" + }, + "description": "First test feature group", + "mlFeatures": [ + "urn:li:mlFeature:(test-1,name)", + "urn:li:mlFeature:(test-1,height)", + "urn:li:mlFeature:(test-1,time)" + ], + "mlPrimaryKeys": [ + "urn:li:mlPrimaryKey:(test-1,id)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test,feature_3)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "CONTINUOUS", - "version": null, - "sources": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { + "urn": "urn:li:mlPrimaryKey:(test,feature_1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { + "dataType": "TEXT", + "sources": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { - "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/sagemaker" - ] - } - }, - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { - "customProperties": { - "arn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test", - "creation_time": "2021-06-14 11:03:00.803000", - "status": "Created" - }, - "description": null, - "mlFeatures": [ - "urn:li:mlFeature:(test,feature_2)", - "urn:li:mlFeature:(test,feature_3)" - ], - "mlPrimaryKeys": [ - "urn:li:mlPrimaryKey:(test,feature_1)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { + "urn": "urn:li:mlFeature:(test,feature_2)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { + "dataType": "ORDINAL", + "sources": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-input-bucket/file_txt,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://auto-ml-job-input-bucket/file.txt", - "datatype": "ManifestFile" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { + "urn": "urn:li:mlFeature:(test,feature_3)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { + "dataType": "CONTINUOUS", + "sources": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-output-bucket/file_txt,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://auto-ml-job-output-bucket/file.txt" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { + "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/sagemaker" + ] + } + }, + { + "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { + "customProperties": { + "arn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test", + "creation_time": "2021-06-14 11:03:00.803000", + "status": "Created" + }, + "mlFeatures": [ + "urn:li:mlFeature:(test,feature_2)", + "urn:li:mlFeature:(test,feature_3)" + ], + "mlPrimaryKeys": [ + "urn:li:mlPrimaryKey:(test,feature_1)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/input-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://compilation-job-bucket/input-config.tar.gz", - "framework": "TENSORFLOW", - "framework_version": "string" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-input-bucket/file_txt,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://auto-ml-job-input-bucket/file.txt", + "datatype": "ManifestFile" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/output-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://compilation-job-bucket/output-config.tar.gz", - "target_device": "lambda", - "target_platform": "{'Os': 'ANDROID', 'Arch': 'X86_64', 'Accelerator': 'INTEL_GRAPHICS'}" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-output-bucket/file_txt,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://auto-ml-job-output-bucket/file.txt" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/model-artifact.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://edge-packaging-bucket/model-artifact.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/input-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://compilation-job-bucket/input-config.tar.gz", + "framework": "TENSORFLOW", + "framework_version": "string" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/output-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://edge-packaging-bucket/output-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/output-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://compilation-job-bucket/output-config.tar.gz", + "target_device": "lambda", + "target_platform": "{'Os': 'ANDROID', 'Arch': 'X86_64', 'Accelerator': 'INTEL_GRAPHICS'}" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/data-source.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://labeling-job/data-source.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/model-artifact.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://edge-packaging-bucket/model-artifact.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/category-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://labeling-job/category-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/output-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://edge-packaging-bucket/output-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-dataset.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://labeling-job/output-dataset.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/data-source.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://labeling-job/data-source.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://labeling-job/output-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/category-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://labeling-job/category-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,processing-job/input-data.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://processing-job/input-data.tar.gz", - "datatype": "ManifestFile", - "mode": "Pipe", - "distribution_type": "FullyReplicated", - "compression": "None", - "name": "string" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-dataset.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://labeling-job/output-dataset.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/input-dataset.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/input-dataset.tar.gz", - "datatype": "None", - "distribution_type": "FullyReplicated", - "attribute_names": "['string']", - "channel_name": "string" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://labeling-job/output-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/output-data.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/output-data.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,processing-job/input-data.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://processing-job/input-data.tar.gz", + "datatype": "ManifestFile", + "mode": "Pipe", + "distribution_type": "FullyReplicated", + "compression": "None", + "name": "string" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/checkpoint-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/checkpoint-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/input-dataset.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/input-dataset.tar.gz", + "datatype": "None", + "distribution_type": "FullyReplicated", + "attribute_names": "['string']", + "channel_name": "string" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-hook-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/debug-hook-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/output-data.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/output-data.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/tensorboard-output-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/tensorboard-output-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/checkpoint-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/checkpoint-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/profiler-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-hook-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/debug-hook-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-rule-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/debug-rule-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/tensorboard-output-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/tensorboard-output-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-rule-config.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://training-job/profiler-rule-config.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/profiler-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/input-data-source.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://transform-job/input-data-source.tar.gz", - "datatype": "ManifestFile", - "compression": "None", - "split": "None" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-rule-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/debug-rule-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/output.tar_gz,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "dataset_type": "s3", - "uri": "s3://transform-job/output.tar.gz" - }, - "externalUrl": null, - "description": null, - "uri": null, - "tags": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-rule-config.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://training-job/profiler-rule-config.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "an-auto-ml-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/input-data-source.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://transform-job/input-data-source.tar.gz", + "datatype": "ManifestFile", + "compression": "None", + "split": "None" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "AutoMLJobName": "an-auto-ml-job", - "AutoMLJobArn": "arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job", - "InputDataConfig": "[{'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://auto-ml-job-input-bucket/file.txt'}}, 'CompressionType': 'None', 'TargetAttributeName': 'some-name'}]", - "OutputDataConfig": "{'KmsKeyId': 'some-key-id', 'S3OutputPath': 's3://auto-ml-job-output-bucket/file.txt'}", - "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", - "AutoMLJobObjective": "{'MetricName': 'Accuracy'}", - "ProblemType": "BinaryClassification", - "AutoMLJobConfig": "{'CompletionCriteria': {'MaxCandidates': 123, 'MaxRuntimePerTrainingJobInSeconds': 123, 'MaxAutoMLJobRuntimeInSeconds': 123}, 'SecurityConfig': {'VolumeKmsKeyId': 'string', 'EnableInterContainerTrafficEncryption': True, 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}}}", - "CreationTime": "2015-01-01 00:00:00+00:00", - "EndTime": "2015-01-01 00:00:00+00:00", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "FailureReason": "string", - "PartialFailureReasons": "[{'PartialFailureMessage': 'string'}]", - "BestCandidate": "{'CandidateName': 'string', 'FinalAutoMLJobObjectiveMetric': {'Type': 'Maximize', 'MetricName': 'Accuracy', 'Value': 1.0}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'string', 'CandidateStepName': 'string'}], 'CandidateStatus': 'Completed', 'InferenceContainers': [{'Image': 'string', 'ModelDataUrl': 's3://auto-ml-job/model-artifact.tar.gz', 'Environment': {'string': 'string'}}], 'CreationTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'EndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'LastModifiedTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'FailureReason': 'string', 'CandidateProperties': {'CandidateArtifactLocations': {'Explainability': 'string'}}}", - "AutoMLJobStatus": "Completed", - "AutoMLJobSecondaryStatus": "Starting", - "GenerateCandidateDefinitionsOnly": "True", - "AutoMLJobArtifacts": "{'CandidateDefinitionNotebookLocation': 'string', 'DataExplorationNotebookLocation': 'string'}", - "ResolvedAttributes": "{'AutoMLJobObjective': {'MetricName': 'Accuracy'}, 'ProblemType': 'BinaryClassification', 'CompletionCriteria': {'MaxCandidates': 123, 'MaxRuntimePerTrainingJobInSeconds': 123, 'MaxAutoMLJobRuntimeInSeconds': 123}}", - "ModelDeployConfig": "{'AutoGenerateEndpointName': True, 'EndpointName': 'string'}", - "ModelDeployResult": "{'EndpointName': 'string'}", - "jobType": "auto_ml" - }, - "externalUrl": null, - "name": "an-auto-ml-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "COMPLETED" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/auto_ml" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-input-bucket/file_txt,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-output-bucket/file_txt,PROD)" - ], - "inputDatajobs": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/output.tar_gz,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "dataset_type": "s3", + "uri": "s3://transform-job/output.tar.gz" + }, + "tags": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "a-compilation-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "an-auto-ml-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "CompilationJobName": "a-compilation-job", - "CompilationJobArn": "arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job", - "CompilationJobStatus": "INPROGRESS", - "CompilationStartTime": "2015-01-01 00:00:00+00:00", - "CompilationEndTime": "2015-01-01 00:00:00+00:00", - "StoppingCondition": "{'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}", - "InferenceImage": "string", - "CreationTime": "2015-01-01 00:00:00+00:00", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "FailureReason": "string", - "ModelArtifacts": "{'S3ModelArtifacts': 's3://compilation-job-bucket/model-artifacts.tar.gz'}", - "ModelDigests": "{'ArtifactDigest': 'string'}", - "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", - "InputConfig": "{'S3Uri': 's3://compilation-job-bucket/input-config.tar.gz', 'DataInputConfig': 'string', 'Framework': 'TENSORFLOW', 'FrameworkVersion': 'string'}", - "OutputConfig": "{'S3OutputLocation': 's3://compilation-job-bucket/output-config.tar.gz', 'TargetDevice': 'lambda', 'TargetPlatform': {'Os': 'ANDROID', 'Arch': 'X86_64', 'Accelerator': 'INTEL_GRAPHICS'}, 'CompilerOptions': 'string', 'KmsKeyId': 'string'}", - "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", - "jobType": "compilation" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/compilation-jobs/a-compilation-job", - "name": "a-compilation-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "IN_PROGRESS" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/compilation" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/input-config.tar_gz,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/output-config.tar_gz,PROD)" - ], - "inputDatajobs": [ - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "AutoMLJobName": "an-auto-ml-job", + "AutoMLJobArn": "arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job", + "InputDataConfig": "[{'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://auto-ml-job-input-bucket/file.txt'}}, 'CompressionType': 'None', 'TargetAttributeName': 'some-name'}]", + "OutputDataConfig": "{'KmsKeyId': 'some-key-id', 'S3OutputPath': 's3://auto-ml-job-output-bucket/file.txt'}", + "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", + "AutoMLJobObjective": "{'MetricName': 'Accuracy'}", + "ProblemType": "BinaryClassification", + "AutoMLJobConfig": "{'CompletionCriteria': {'MaxCandidates': 123, 'MaxRuntimePerTrainingJobInSeconds': 123, 'MaxAutoMLJobRuntimeInSeconds': 123}, 'SecurityConfig': {'VolumeKmsKeyId': 'string', 'EnableInterContainerTrafficEncryption': True, 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}}}", + "CreationTime": "2015-01-01 00:00:00+00:00", + "EndTime": "2015-01-01 00:00:00+00:00", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "FailureReason": "string", + "PartialFailureReasons": "[{'PartialFailureMessage': 'string'}]", + "BestCandidate": "{'CandidateName': 'string', 'FinalAutoMLJobObjectiveMetric': {'Type': 'Maximize', 'MetricName': 'Accuracy', 'Value': 1.0}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'string', 'CandidateStepName': 'string'}], 'CandidateStatus': 'Completed', 'InferenceContainers': [{'Image': 'string', 'ModelDataUrl': 's3://auto-ml-job/model-artifact.tar.gz', 'Environment': {'string': 'string'}}], 'CreationTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'EndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'LastModifiedTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'FailureReason': 'string', 'CandidateProperties': {'CandidateArtifactLocations': {'Explainability': 'string'}}}", + "AutoMLJobStatus": "Completed", + "AutoMLJobSecondaryStatus": "Starting", + "GenerateCandidateDefinitionsOnly": "True", + "AutoMLJobArtifacts": "{'CandidateDefinitionNotebookLocation': 'string', 'DataExplorationNotebookLocation': 'string'}", + "ResolvedAttributes": "{'AutoMLJobObjective': {'MetricName': 'Accuracy'}, 'ProblemType': 'BinaryClassification', 'CompletionCriteria': {'MaxCandidates': 123, 'MaxRuntimePerTrainingJobInSeconds': 123, 'MaxAutoMLJobRuntimeInSeconds': 123}}", + "ModelDeployConfig": "{'AutoGenerateEndpointName': True, 'EndpointName': 'string'}", + "ModelDeployResult": "{'EndpointName': 'string'}", + "jobType": "auto_ml" + }, + "name": "an-auto-ml-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "COMPLETED" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/auto_ml" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-input-bucket/file_txt,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-output-bucket/file_txt,PROD)" + ], + "inputDatajobs": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "an-edge-packaging-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "a-compilation-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "EdgePackagingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job", - "EdgePackagingJobName": "an-edge-packaging-job", - "CompilationJobName": "a-compilation-job", - "ModelName": "the-second-model", - "ModelVersion": "string", - "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", - "OutputConfig": "{'S3OutputLocation': 's3://edge-packaging-bucket/output-config.tar.gz', 'KmsKeyId': 'string', 'PresetDeploymentType': 'GreengrassV2Component', 'PresetDeploymentConfig': 'string'}", - "ResourceKey": "string", - "EdgePackagingJobStatus": "STARTING", - "EdgePackagingJobStatusMessage": "string", - "CreationTime": "2015-01-01 00:00:00+00:00", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "ModelArtifact": "s3://edge-packaging-bucket/model-artifact.tar.gz", - "ModelSignature": "string", - "PresetDeploymentOutput": "{'Type': 'GreengrassV2Component', 'Artifact': 'arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/some-artifact', 'Status': 'COMPLETED', 'StatusMessage': 'string'}", - "jobType": "edge_packaging" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/edge-packaging-jobs/an-edge-packaging-job", - "name": "an-edge-packaging-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "STARTING" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/edge_packaging" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/model-artifact.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/output-config.tar_gz,PROD)" - ], - "inputDatajobs": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "CompilationJobName": "a-compilation-job", + "CompilationJobArn": "arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job", + "CompilationJobStatus": "INPROGRESS", + "CompilationStartTime": "2015-01-01 00:00:00+00:00", + "CompilationEndTime": "2015-01-01 00:00:00+00:00", + "StoppingCondition": "{'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}", + "InferenceImage": "string", + "CreationTime": "2015-01-01 00:00:00+00:00", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "FailureReason": "string", + "ModelArtifacts": "{'S3ModelArtifacts': 's3://compilation-job-bucket/model-artifacts.tar.gz'}", + "ModelDigests": "{'ArtifactDigest': 'string'}", + "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", + "InputConfig": "{'S3Uri': 's3://compilation-job-bucket/input-config.tar.gz', 'DataInputConfig': 'string', 'Framework': 'TENSORFLOW', 'FrameworkVersion': 'string'}", + "OutputConfig": "{'S3OutputLocation': 's3://compilation-job-bucket/output-config.tar.gz', 'TargetDevice': 'lambda', 'TargetPlatform': {'Os': 'ANDROID', 'Arch': 'X86_64', 'Accelerator': 'INTEL_GRAPHICS'}, 'CompilerOptions': 'string', 'KmsKeyId': 'string'}", + "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", + "jobType": "compilation" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/compilation-jobs/a-compilation-job", + "name": "a-compilation-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "IN_PROGRESS" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/compilation" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/input-config.tar_gz,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/output-config.tar_gz,PROD)" + ], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,hyper_parameter_tuning:a-hyper-parameter-tuning-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "a-hyper-parameter-tuning-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "an-edge-packaging-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,hyper_parameter_tuning:a-hyper-parameter-tuning-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:hyper-parameter-tuning-job/a-hyper-parameter-tuning-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "HyperParameterTuningJobName": "a-hyper-parameter-tuning-job", - "HyperParameterTuningJobArn": "arn:aws:sagemaker:us-west-2:123412341234:hyper-parameter-tuning-job/a-hyper-parameter-tuning-job", - "HyperParameterTuningJobConfig": "{'Strategy': 'Bayesian', 'HyperParameterTuningJobObjective': {'Type': 'Maximize', 'MetricName': 'string'}, 'ResourceLimits': {'MaxNumberOfTrainingJobs': 123, 'MaxParallelTrainingJobs': 123}, 'ParameterRanges': {'IntegerParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'ContinuousParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'CategoricalParameterRanges': [{'Name': 'string', 'Values': ['string']}]}, 'TrainingJobEarlyStoppingType': 'Off', 'TuningJobCompletionCriteria': {'TargetObjectiveMetricValue': 1.0}}", - "TrainingJobDefinition": "{'DefinitionName': 'string', 'TuningObjective': {'Type': 'Maximize', 'MetricName': 'string'}, 'HyperParameterRanges': {'IntegerParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'ContinuousParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'CategoricalParameterRanges': [{'Name': 'string', 'Values': ['string']}]}, 'StaticHyperParameters': {'string': 'string'}, 'AlgorithmSpecification': {'TrainingImage': 'string', 'TrainingInputMode': 'Pipe', 'AlgorithmName': 'string', 'MetricDefinitions': [{'Name': 'string', 'Regex': 'string'}]}, 'RoleArn': 'arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole', 'InputDataConfig': [{'ChannelName': 'string', 'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://hyper-parameter-tuning-job/data-source.tar.gz', 'S3DataDistributionType': 'FullyReplicated', 'AttributeNames': ['string']}, 'FileSystemDataSource': {'FileSystemId': 'abcdefgihjklmnopqrstuvwxyz', 'FileSystemAccessMode': 'rw', 'FileSystemType': 'EFS', 'DirectoryPath': 'string'}}, 'ContentType': 'string', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'Pipe', 'ShuffleConfig': {'Seed': 123}}], 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}, 'OutputDataConfig': {'KmsKeyId': 'string', 'S3OutputPath': 's3://hyper-parameter-tuning-job/data-output.tar.gz'}, 'ResourceConfig': {'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}, 'StoppingCondition': {'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}, 'EnableNetworkIsolation': True, 'EnableInterContainerTrafficEncryption': True, 'EnableManagedSpotTraining': True, 'CheckpointConfig': {'S3Uri': 's3://hyper-parameter-tuning-job/checkpoint-config.tar.gz', 'LocalPath': 'string'}, 'RetryStrategy': {'MaximumRetryAttempts': 123}}", - "TrainingJobDefinitions": "[{'DefinitionName': 'string', 'TuningObjective': {'Type': 'Maximize', 'MetricName': 'string'}, 'HyperParameterRanges': {'IntegerParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'ContinuousParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'CategoricalParameterRanges': [{'Name': 'string', 'Values': ['string']}]}, 'StaticHyperParameters': {'string': 'string'}, 'AlgorithmSpecification': {'TrainingImage': 'string', 'TrainingInputMode': 'Pipe', 'AlgorithmName': 'string', 'MetricDefinitions': [{'Name': 'string', 'Regex': 'string'}]}, 'RoleArn': 'arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole', 'InputDataConfig': [{'ChannelName': 'string', 'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://hyper-parameter-tuning-job/data-source.tar.gz', 'S3DataDistributionType': 'FullyReplicated', 'AttributeNames': ['string']}, 'FileSystemDataSource': {'FileSystemId': 'abcdefgihjklmnopqrstuvwxyz', 'FileSystemAccessMode': 'rw', 'FileSystemType': 'EFS', 'DirectoryPath': 'string'}}, 'ContentType': 'string', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'Pipe', 'ShuffleConfig': {'Seed': 123}}], 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}, 'OutputDataConfig': {'KmsKeyId': 'string', 'S3OutputPath': 's3://hyper-parameter-tuning-job/data-output.tar.gz'}, 'ResourceConfig': {'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}, 'StoppingCondition': {'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}, 'EnableNetworkIsolation': True, 'EnableInterContainerTrafficEncryption': True, 'EnableManagedSpotTraining': True, 'CheckpointConfig': {'S3Uri': 's3://hyper-parameter-tuning-job/checkpoint-config.tar.gz', 'LocalPath': 'string'}, 'RetryStrategy': {'MaximumRetryAttempts': 123}}]", - "HyperParameterTuningJobStatus": "Completed", - "CreationTime": "2015-01-01 00:00:00+00:00", - "HyperParameterTuningEndTime": "2015-01-01 00:00:00+00:00", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "TrainingJobStatusCounters": "{'Completed': 123, 'InProgress': 123, 'RetryableError': 123, 'NonRetryableError': 123, 'Stopped': 123}", - "ObjectiveStatusCounters": "{'Succeeded': 123, 'Pending': 123, 'Failed': 123}", - "BestTrainingJob": "{'TrainingJobDefinitionName': 'string', 'TrainingJobName': 'string', 'TrainingJobArn': 'string', 'TuningJobName': 'string', 'CreationTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingStartTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingEndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingJobStatus': 'InProgress', 'TunedHyperParameters': {'string': 'string'}, 'FailureReason': 'string', 'FinalHyperParameterTuningJobObjectiveMetric': {'Type': 'Maximize', 'MetricName': 'string', 'Value': 1.0}, 'ObjectiveStatus': 'Succeeded'}", - "OverallBestTrainingJob": "{'TrainingJobDefinitionName': 'string', 'TrainingJobName': 'string', 'TrainingJobArn': 'string', 'TuningJobName': 'string', 'CreationTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingStartTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingEndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingJobStatus': 'InProgress', 'TunedHyperParameters': {'string': 'string'}, 'FailureReason': 'string', 'FinalHyperParameterTuningJobObjectiveMetric': {'Type': 'Maximize', 'MetricName': 'string', 'Value': 1.0}, 'ObjectiveStatus': 'Succeeded'}", - "WarmStartConfig": "{'ParentHyperParameterTuningJobs': [{'HyperParameterTuningJobName': 'string'}], 'WarmStartType': 'IdenticalDataAndAlgorithm'}", - "FailureReason": "string", - "jobType": "hyper_parameter_tuning" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/hyper-tuning-jobs/a-hyper-parameter-tuning-job", - "name": "a-hyper-parameter-tuning-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "COMPLETED" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/hyper_parameter_tuning" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "EdgePackagingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job", + "EdgePackagingJobName": "an-edge-packaging-job", + "CompilationJobName": "a-compilation-job", + "ModelName": "the-second-model", + "ModelVersion": "string", + "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", + "OutputConfig": "{'S3OutputLocation': 's3://edge-packaging-bucket/output-config.tar.gz', 'KmsKeyId': 'string', 'PresetDeploymentType': 'GreengrassV2Component', 'PresetDeploymentConfig': 'string'}", + "ResourceKey": "string", + "EdgePackagingJobStatus": "STARTING", + "EdgePackagingJobStatusMessage": "string", + "CreationTime": "2015-01-01 00:00:00+00:00", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "ModelArtifact": "s3://edge-packaging-bucket/model-artifact.tar.gz", + "ModelSignature": "string", + "PresetDeploymentOutput": "{'Type': 'GreengrassV2Component', 'Artifact': 'arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/some-artifact', 'Status': 'COMPLETED', 'StatusMessage': 'string'}", + "jobType": "edge_packaging" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/edge-packaging-jobs/an-edge-packaging-job", + "name": "an-edge-packaging-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "STARTING" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/edge_packaging" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/model-artifact.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/output-config.tar_gz,PROD)" + ], + "inputDatajobs": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "a-labeling-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,hyper_parameter_tuning:a-hyper-parameter-tuning-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "a-hyper-parameter-tuning-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "LabelingJobStatus": "Initializing", - "LabelCounters": "{'TotalLabeled': 123, 'HumanLabeled': 123, 'MachineLabeled': 123, 'FailedNonRetryableError': 123, 'Unlabeled': 123}", - "FailureReason": "string", - "CreationTime": "2015-01-01 00:00:00+00:00", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "JobReferenceCode": "string", - "LabelingJobName": "a-labeling-job", - "LabelingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job", - "LabelAttributeName": "string", - "InputConfig": "{'DataSource': {'S3DataSource': {'ManifestS3Uri': 's3://labeling-job/data-source.tar.gz'}, 'SnsDataSource': {'SnsTopicArn': 'string'}}, 'DataAttributes': {'ContentClassifiers': ['FreeOfPersonallyIdentifiableInformation', 'FreeOfAdultContent']}}", - "OutputConfig": "{'S3OutputPath': 's3://labeling-job/output-config.tar.gz', 'KmsKeyId': 'string', 'SnsTopicArn': 'string'}", - "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", - "LabelCategoryConfigS3Uri": "s3://labeling-job/category-config.tar.gz", - "StoppingConditions": "{'MaxHumanLabeledObjectCount': 123, 'MaxPercentageOfInputDatasetLabeled': 123}", - "LabelingJobAlgorithmsConfig": "{'LabelingJobAlgorithmSpecificationArn': 'string', 'InitialActiveLearningModelArn': 'arn:aws:sagemaker:us-west-2:123412341234:labeling-job/initial-active-learning-model', 'LabelingJobResourceConfig': {'VolumeKmsKeyId': 'string'}}", - "HumanTaskConfig": "{'WorkteamArn': 'string', 'UiConfig': {'UiTemplateS3Uri': 's3://labeling-job/ui-config.tar.gz', 'HumanTaskUiArn': 'string'}, 'PreHumanTaskLambdaArn': 'string', 'TaskKeywords': ['string'], 'TaskTitle': 'string', 'TaskDescription': 'string', 'NumberOfHumanWorkersPerDataObject': 123, 'TaskTimeLimitInSeconds': 123, 'TaskAvailabilityLifetimeInSeconds': 123, 'MaxConcurrentTaskCount': 123, 'AnnotationConsolidationConfig': {'AnnotationConsolidationLambdaArn': 'string'}, 'PublicWorkforceTaskPrice': {'AmountInUsd': {'Dollars': 123, 'Cents': 123, 'TenthFractionsOfACent': 123}}}", - "Tags": "[{'Key': 'string', 'Value': 'string'}]", - "LabelingJobOutput": "{'OutputDatasetS3Uri': 's3://labeling-job/output-dataset.tar.gz', 'FinalActiveLearningModelArn': 'arn:aws:sagemaker:us-west-2:123412341234:labeling-job/final-active-learning-model'}", - "jobType": "labeling" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/labeling-jobs/a-labeling-job", - "name": "a-labeling-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "STARTING" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/labeling" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/category-config.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/data-source.tar_gz,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-config.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-dataset.tar_gz,PROD)" - ], - "inputDatajobs": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,hyper_parameter_tuning:a-hyper-parameter-tuning-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:hyper-parameter-tuning-job/a-hyper-parameter-tuning-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "HyperParameterTuningJobName": "a-hyper-parameter-tuning-job", + "HyperParameterTuningJobArn": "arn:aws:sagemaker:us-west-2:123412341234:hyper-parameter-tuning-job/a-hyper-parameter-tuning-job", + "HyperParameterTuningJobConfig": "{'Strategy': 'Bayesian', 'HyperParameterTuningJobObjective': {'Type': 'Maximize', 'MetricName': 'string'}, 'ResourceLimits': {'MaxNumberOfTrainingJobs': 123, 'MaxParallelTrainingJobs': 123}, 'ParameterRanges': {'IntegerParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'ContinuousParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'CategoricalParameterRanges': [{'Name': 'string', 'Values': ['string']}]}, 'TrainingJobEarlyStoppingType': 'Off', 'TuningJobCompletionCriteria': {'TargetObjectiveMetricValue': 1.0}}", + "TrainingJobDefinition": "{'DefinitionName': 'string', 'TuningObjective': {'Type': 'Maximize', 'MetricName': 'string'}, 'HyperParameterRanges': {'IntegerParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'ContinuousParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'CategoricalParameterRanges': [{'Name': 'string', 'Values': ['string']}]}, 'StaticHyperParameters': {'string': 'string'}, 'AlgorithmSpecification': {'TrainingImage': 'string', 'TrainingInputMode': 'Pipe', 'AlgorithmName': 'string', 'MetricDefinitions': [{'Name': 'string', 'Regex': 'string'}]}, 'RoleArn': 'arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole', 'InputDataConfig': [{'ChannelName': 'string', 'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://hyper-parameter-tuning-job/data-source.tar.gz', 'S3DataDistributionType': 'FullyReplicated', 'AttributeNames': ['string']}, 'FileSystemDataSource': {'FileSystemId': 'abcdefgihjklmnopqrstuvwxyz', 'FileSystemAccessMode': 'rw', 'FileSystemType': 'EFS', 'DirectoryPath': 'string'}}, 'ContentType': 'string', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'Pipe', 'ShuffleConfig': {'Seed': 123}}], 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}, 'OutputDataConfig': {'KmsKeyId': 'string', 'S3OutputPath': 's3://hyper-parameter-tuning-job/data-output.tar.gz'}, 'ResourceConfig': {'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}, 'StoppingCondition': {'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}, 'EnableNetworkIsolation': True, 'EnableInterContainerTrafficEncryption': True, 'EnableManagedSpotTraining': True, 'CheckpointConfig': {'S3Uri': 's3://hyper-parameter-tuning-job/checkpoint-config.tar.gz', 'LocalPath': 'string'}, 'RetryStrategy': {'MaximumRetryAttempts': 123}}", + "TrainingJobDefinitions": "[{'DefinitionName': 'string', 'TuningObjective': {'Type': 'Maximize', 'MetricName': 'string'}, 'HyperParameterRanges': {'IntegerParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'ContinuousParameterRanges': [{'Name': 'string', 'MinValue': 'string', 'MaxValue': 'string', 'ScalingType': 'Auto'}], 'CategoricalParameterRanges': [{'Name': 'string', 'Values': ['string']}]}, 'StaticHyperParameters': {'string': 'string'}, 'AlgorithmSpecification': {'TrainingImage': 'string', 'TrainingInputMode': 'Pipe', 'AlgorithmName': 'string', 'MetricDefinitions': [{'Name': 'string', 'Regex': 'string'}]}, 'RoleArn': 'arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole', 'InputDataConfig': [{'ChannelName': 'string', 'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://hyper-parameter-tuning-job/data-source.tar.gz', 'S3DataDistributionType': 'FullyReplicated', 'AttributeNames': ['string']}, 'FileSystemDataSource': {'FileSystemId': 'abcdefgihjklmnopqrstuvwxyz', 'FileSystemAccessMode': 'rw', 'FileSystemType': 'EFS', 'DirectoryPath': 'string'}}, 'ContentType': 'string', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'Pipe', 'ShuffleConfig': {'Seed': 123}}], 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}, 'OutputDataConfig': {'KmsKeyId': 'string', 'S3OutputPath': 's3://hyper-parameter-tuning-job/data-output.tar.gz'}, 'ResourceConfig': {'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}, 'StoppingCondition': {'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}, 'EnableNetworkIsolation': True, 'EnableInterContainerTrafficEncryption': True, 'EnableManagedSpotTraining': True, 'CheckpointConfig': {'S3Uri': 's3://hyper-parameter-tuning-job/checkpoint-config.tar.gz', 'LocalPath': 'string'}, 'RetryStrategy': {'MaximumRetryAttempts': 123}}]", + "HyperParameterTuningJobStatus": "Completed", + "CreationTime": "2015-01-01 00:00:00+00:00", + "HyperParameterTuningEndTime": "2015-01-01 00:00:00+00:00", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "TrainingJobStatusCounters": "{'Completed': 123, 'InProgress': 123, 'RetryableError': 123, 'NonRetryableError': 123, 'Stopped': 123}", + "ObjectiveStatusCounters": "{'Succeeded': 123, 'Pending': 123, 'Failed': 123}", + "BestTrainingJob": "{'TrainingJobDefinitionName': 'string', 'TrainingJobName': 'string', 'TrainingJobArn': 'string', 'TuningJobName': 'string', 'CreationTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingStartTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingEndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingJobStatus': 'InProgress', 'TunedHyperParameters': {'string': 'string'}, 'FailureReason': 'string', 'FinalHyperParameterTuningJobObjectiveMetric': {'Type': 'Maximize', 'MetricName': 'string', 'Value': 1.0}, 'ObjectiveStatus': 'Succeeded'}", + "OverallBestTrainingJob": "{'TrainingJobDefinitionName': 'string', 'TrainingJobName': 'string', 'TrainingJobArn': 'string', 'TuningJobName': 'string', 'CreationTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingStartTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingEndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'TrainingJobStatus': 'InProgress', 'TunedHyperParameters': {'string': 'string'}, 'FailureReason': 'string', 'FinalHyperParameterTuningJobObjectiveMetric': {'Type': 'Maximize', 'MetricName': 'string', 'Value': 1.0}, 'ObjectiveStatus': 'Succeeded'}", + "WarmStartConfig": "{'ParentHyperParameterTuningJobs': [{'HyperParameterTuningJobName': 'string'}], 'WarmStartType': 'IdenticalDataAndAlgorithm'}", + "FailureReason": "string", + "jobType": "hyper_parameter_tuning" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/hyper-tuning-jobs/a-hyper-parameter-tuning-job", + "name": "a-hyper-parameter-tuning-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "COMPLETED" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/hyper_parameter_tuning" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [], + "outputDatasets": [], + "inputDatajobs": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,processing:a-processing-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "a-processing-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "a-labeling-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,processing:a-processing-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:processing-job/a-processing-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "ProcessingJobName": "a-processing-job", - "ProcessingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:processing-job/a-processing-job", - "ProcessingInputs": "[{'InputName': 'string', 'AppManaged': True, 'S3Input': {'S3Uri': 's3://processing-job/input-data.tar.gz', 'LocalPath': 'string', 'S3DataType': 'ManifestFile', 'S3InputMode': 'Pipe', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}, 'DatasetDefinition': {'AthenaDatasetDefinition': {'Catalog': 'athena-catalog', 'Database': 'athena-database', 'QueryString': 'athena-query-string', 'WorkGroup': 'athena-work-group', 'OutputS3Uri': 's3://processing-job/athena-output.tar.gz', 'KmsKeyId': 'string', 'OutputFormat': 'PARQUET', 'OutputCompression': 'GZIP'}, 'RedshiftDatasetDefinition': {'ClusterId': 'redshift-cluster', 'Database': 'redshift-database', 'DbUser': 'redshift-db-user', 'QueryString': 'redshift-query-string', 'ClusterRoleArn': 'arn:aws:sagemaker:us-west-2:123412341234:processing-job/redshift-cluster', 'OutputS3Uri': 's3://processing-job/redshift-output.tar.gz', 'KmsKeyId': 'string', 'OutputFormat': 'PARQUET', 'OutputCompression': 'None'}, 'LocalPath': 'string', 'DataDistributionType': 'FullyReplicated', 'InputMode': 'Pipe'}}]", - "ProcessingOutputConfig": "{'Outputs': [{'OutputName': 'string', 'S3Output': {'S3Uri': 's3://processing-job/processing-output.tar.gz', 'LocalPath': 'string', 'S3UploadMode': 'Continuous'}, 'FeatureStoreOutput': {'FeatureGroupName': 'string'}, 'AppManaged': True}], 'KmsKeyId': 'string'}", - "ProcessingResources": "{'ClusterConfig': {'InstanceCount': 123, 'InstanceType': 'ml.t3.medium', 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}}", - "StoppingCondition": "{'MaxRuntimeInSeconds': 123}", - "AppSpecification": "{'ImageUri': 'string', 'ContainerEntrypoint': ['string'], 'ContainerArguments': ['string']}", - "Environment": "{'string': 'string'}", - "NetworkConfig": "{'EnableInterContainerTrafficEncryption': True, 'EnableNetworkIsolation': True, 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}}", - "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", - "ExperimentConfig": "{'ExperimentName': 'string', 'TrialName': 'string', 'TrialComponentDisplayName': 'string'}", - "ProcessingJobStatus": "InProgress", - "ExitMessage": "string", - "FailureReason": "string", - "ProcessingEndTime": "2015-01-01 00:00:00+00:00", - "ProcessingStartTime": "2015-01-01 00:00:00+00:00", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "CreationTime": "2015-01-01 00:00:00+00:00", - "MonitoringScheduleArn": "string", - "AutoMLJobArn": "arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job", - "TrainingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job", - "jobType": "processing" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/processing-jobs/a-processing-job", - "name": "a-processing-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "IN_PROGRESS" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/processing" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,processing-job/input-data.tar_gz,PROD)" - ], - "outputDatasets": [], - "inputDatajobs": [ - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job)", - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "LabelingJobStatus": "Initializing", + "LabelCounters": "{'TotalLabeled': 123, 'HumanLabeled': 123, 'MachineLabeled': 123, 'FailedNonRetryableError': 123, 'Unlabeled': 123}", + "FailureReason": "string", + "CreationTime": "2015-01-01 00:00:00+00:00", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "JobReferenceCode": "string", + "LabelingJobName": "a-labeling-job", + "LabelingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job", + "LabelAttributeName": "string", + "InputConfig": "{'DataSource': {'S3DataSource': {'ManifestS3Uri': 's3://labeling-job/data-source.tar.gz'}, 'SnsDataSource': {'SnsTopicArn': 'string'}}, 'DataAttributes': {'ContentClassifiers': ['FreeOfPersonallyIdentifiableInformation', 'FreeOfAdultContent']}}", + "OutputConfig": "{'S3OutputPath': 's3://labeling-job/output-config.tar.gz', 'KmsKeyId': 'string', 'SnsTopicArn': 'string'}", + "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", + "LabelCategoryConfigS3Uri": "s3://labeling-job/category-config.tar.gz", + "StoppingConditions": "{'MaxHumanLabeledObjectCount': 123, 'MaxPercentageOfInputDatasetLabeled': 123}", + "LabelingJobAlgorithmsConfig": "{'LabelingJobAlgorithmSpecificationArn': 'string', 'InitialActiveLearningModelArn': 'arn:aws:sagemaker:us-west-2:123412341234:labeling-job/initial-active-learning-model', 'LabelingJobResourceConfig': {'VolumeKmsKeyId': 'string'}}", + "HumanTaskConfig": "{'WorkteamArn': 'string', 'UiConfig': {'UiTemplateS3Uri': 's3://labeling-job/ui-config.tar.gz', 'HumanTaskUiArn': 'string'}, 'PreHumanTaskLambdaArn': 'string', 'TaskKeywords': ['string'], 'TaskTitle': 'string', 'TaskDescription': 'string', 'NumberOfHumanWorkersPerDataObject': 123, 'TaskTimeLimitInSeconds': 123, 'TaskAvailabilityLifetimeInSeconds': 123, 'MaxConcurrentTaskCount': 123, 'AnnotationConsolidationConfig': {'AnnotationConsolidationLambdaArn': 'string'}, 'PublicWorkforceTaskPrice': {'AmountInUsd': {'Dollars': 123, 'Cents': 123, 'TenthFractionsOfACent': 123}}}", + "Tags": "[{'Key': 'string', 'Value': 'string'}]", + "LabelingJobOutput": "{'OutputDatasetS3Uri': 's3://labeling-job/output-dataset.tar.gz', 'FinalActiveLearningModelArn': 'arn:aws:sagemaker:us-west-2:123412341234:labeling-job/final-active-learning-model'}", + "jobType": "labeling" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/labeling-jobs/a-labeling-job", + "name": "a-labeling-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "STARTING" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/labeling" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/category-config.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/data-source.tar_gz,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-config.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-dataset.tar_gz,PROD)" + ], + "inputDatajobs": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,training:a-training-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "a-training-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,processing:a-processing-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "a-processing-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "TrainingJobName": "a-training-job", - "TrainingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job", - "TuningJobArn": "string", - "LabelingJobArn": "string", - "AutoMLJobArn": "string", - "ModelArtifacts": "{'S3ModelArtifacts': 's3://the-first-model-data-url/data.tar.gz'}", - "TrainingJobStatus": "InProgress", - "SecondaryStatus": "Starting", - "FailureReason": "string", - "HyperParameters": "{'parameter-1': 'some-value', 'parameter-2': 'another-value'}", - "AlgorithmSpecification": "{'TrainingImage': 'string', 'AlgorithmName': 'string', 'TrainingInputMode': 'Pipe', 'MetricDefinitions': [{'Name': 'string', 'Regex': 'string'}], 'EnableSageMakerMetricsTimeSeries': True}", - "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", - "InputDataConfig": "[{'ChannelName': 'string', 'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://training-job/input-dataset.tar.gz', 'S3DataDistributionType': 'FullyReplicated', 'AttributeNames': ['string']}, 'FileSystemDataSource': {'FileSystemId': 'abcdefgihjklmnopqrstuvwxyz', 'FileSystemAccessMode': 'rw', 'FileSystemType': 'EFS', 'DirectoryPath': 'string'}}, 'ContentType': 'string', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'Pipe', 'ShuffleConfig': {'Seed': 123}}]", - "OutputDataConfig": "{'KmsKeyId': 'string', 'S3OutputPath': 's3://training-job/output-data.tar.gz'}", - "ResourceConfig": "{'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}", - "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", - "StoppingCondition": "{'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}", - "CreationTime": "2015-01-01 00:00:00+00:00", - "TrainingStartTime": "2015-01-01 00:00:00+00:00", - "TrainingEndTime": "2015-01-01 00:00:00+00:00", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "SecondaryStatusTransitions": "[{'Status': 'Starting', 'StartTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'EndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'StatusMessage': 'string'}]", - "FinalMetricDataList": "[{'MetricName': 'some-metric', 'Value': 1.0, 'Timestamp': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}, {'MetricName': 'another-metric', 'Value': 1.0, 'Timestamp': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}, {'MetricName': 'some-metric', 'Value': 0.0, 'Timestamp': datetime.datetime(2014, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}]", - "EnableNetworkIsolation": "True", - "EnableInterContainerTrafficEncryption": "True", - "EnableManagedSpotTraining": "True", - "CheckpointConfig": "{'S3Uri': 's3://training-job/checkpoint-config.tar.gz', 'LocalPath': 'string'}", - "TrainingTimeInSeconds": "123", - "BillableTimeInSeconds": "123", - "DebugHookConfig": "{'LocalPath': 'string', 'S3OutputPath': 's3://training-job/debug-hook-config.tar.gz', 'HookParameters': {'string': 'string'}, 'CollectionConfigurations': [{'CollectionName': 'string', 'CollectionParameters': {'string': 'string'}}]}", - "ExperimentConfig": "{'ExperimentName': 'string', 'TrialName': 'string', 'TrialComponentDisplayName': 'string'}", - "DebugRuleConfigurations": "[{'RuleConfigurationName': 'string', 'LocalPath': 'string', 'S3OutputPath': 's3://training-job/debug-rule-config.tar.gz', 'RuleEvaluatorImage': 'string', 'InstanceType': 'ml.t3.medium', 'VolumeSizeInGB': 123, 'RuleParameters': {'string': 'string'}}]", - "TensorBoardOutputConfig": "{'LocalPath': 'string', 'S3OutputPath': 's3://training-job/tensorboard-output-config.tar.gz'}", - "DebugRuleEvaluationStatuses": "[{'RuleConfigurationName': 'string', 'RuleEvaluationJobArn': 'string', 'RuleEvaluationStatus': 'InProgress', 'StatusDetails': 'string', 'LastModifiedTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}]", - "ProfilerConfig": "{'S3OutputPath': 's3://training-job/profiler-config.tar.gz', 'ProfilingIntervalInMilliseconds': 123, 'ProfilingParameters': {'string': 'string'}}", - "ProfilerRuleConfigurations": "[{'RuleConfigurationName': 'string', 'LocalPath': 'string', 'S3OutputPath': 's3://training-job/profiler-rule-config.tar.gz', 'RuleEvaluatorImage': 'string', 'InstanceType': 'ml.t3.medium', 'VolumeSizeInGB': 123, 'RuleParameters': {'string': 'string'}}]", - "ProfilerRuleEvaluationStatuses": "[{'RuleConfigurationName': 'string', 'RuleEvaluationJobArn': 'string', 'RuleEvaluationStatus': 'InProgress', 'StatusDetails': 'string', 'LastModifiedTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}]", - "ProfilingStatus": "Enabled", - "RetryStrategy": "{'MaximumRetryAttempts': 123}", - "Environment": "{'string': 'string'}", - "jobType": "training" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/jobs/a-training-job", - "name": "a-training-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "IN_PROGRESS" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/training" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/input-dataset.tar_gz,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/checkpoint-config.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-hook-config.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-rule-config.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/output-data.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-config.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-rule-config.tar_gz,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/tensorboard-output-config.tar_gz,PROD)" - ], - "inputDatajobs": [] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,processing:a-processing-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:processing-job/a-processing-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "ProcessingJobName": "a-processing-job", + "ProcessingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:processing-job/a-processing-job", + "ProcessingInputs": "[{'InputName': 'string', 'AppManaged': True, 'S3Input': {'S3Uri': 's3://processing-job/input-data.tar.gz', 'LocalPath': 'string', 'S3DataType': 'ManifestFile', 'S3InputMode': 'Pipe', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}, 'DatasetDefinition': {'AthenaDatasetDefinition': {'Catalog': 'athena-catalog', 'Database': 'athena-database', 'QueryString': 'athena-query-string', 'WorkGroup': 'athena-work-group', 'OutputS3Uri': 's3://processing-job/athena-output.tar.gz', 'KmsKeyId': 'string', 'OutputFormat': 'PARQUET', 'OutputCompression': 'GZIP'}, 'RedshiftDatasetDefinition': {'ClusterId': 'redshift-cluster', 'Database': 'redshift-database', 'DbUser': 'redshift-db-user', 'QueryString': 'redshift-query-string', 'ClusterRoleArn': 'arn:aws:sagemaker:us-west-2:123412341234:processing-job/redshift-cluster', 'OutputS3Uri': 's3://processing-job/redshift-output.tar.gz', 'KmsKeyId': 'string', 'OutputFormat': 'PARQUET', 'OutputCompression': 'None'}, 'LocalPath': 'string', 'DataDistributionType': 'FullyReplicated', 'InputMode': 'Pipe'}}]", + "ProcessingOutputConfig": "{'Outputs': [{'OutputName': 'string', 'S3Output': {'S3Uri': 's3://processing-job/processing-output.tar.gz', 'LocalPath': 'string', 'S3UploadMode': 'Continuous'}, 'FeatureStoreOutput': {'FeatureGroupName': 'string'}, 'AppManaged': True}], 'KmsKeyId': 'string'}", + "ProcessingResources": "{'ClusterConfig': {'InstanceCount': 123, 'InstanceType': 'ml.t3.medium', 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}}", + "StoppingCondition": "{'MaxRuntimeInSeconds': 123}", + "AppSpecification": "{'ImageUri': 'string', 'ContainerEntrypoint': ['string'], 'ContainerArguments': ['string']}", + "Environment": "{'string': 'string'}", + "NetworkConfig": "{'EnableInterContainerTrafficEncryption': True, 'EnableNetworkIsolation': True, 'VpcConfig': {'SecurityGroupIds': ['string'], 'Subnets': ['string']}}", + "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", + "ExperimentConfig": "{'ExperimentName': 'string', 'TrialName': 'string', 'TrialComponentDisplayName': 'string'}", + "ProcessingJobStatus": "InProgress", + "ExitMessage": "string", + "FailureReason": "string", + "ProcessingEndTime": "2015-01-01 00:00:00+00:00", + "ProcessingStartTime": "2015-01-01 00:00:00+00:00", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "CreationTime": "2015-01-01 00:00:00+00:00", + "MonitoringScheduleArn": "string", + "AutoMLJobArn": "arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job", + "TrainingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job", + "jobType": "processing" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/processing-jobs/a-processing-job", + "name": "a-processing-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "IN_PROGRESS" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/processing" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,processing-job/input-data.tar_gz,PROD)" + ], + "outputDatasets": [], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job)", + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { - "urn": "urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { - "customProperties": {}, - "externalUrl": null, - "name": "a-transform-job", - "description": null, - "project": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,training:a-training-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "a-training-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { - "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job)", - "aspects": [ - { - "com.linkedin.pegasus2avro.datajob.DataJobInfo": { - "customProperties": { - "TransformJobName": "a-transform-job", - "TransformJobArn": "arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job", - "TransformJobStatus": "InProgress", - "FailureReason": "string", - "ModelName": "the-second-model", - "MaxConcurrentTransforms": "123", - "ModelClientConfig": "{'InvocationsTimeoutInSeconds': 123, 'InvocationsMaxRetries': 123}", - "MaxPayloadInMB": "123", - "BatchStrategy": "MultiRecord", - "Environment": "{'string': 'string'}", - "TransformInput": "{'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://transform-job/input-data-source.tar.gz'}}, 'ContentType': 'string', 'CompressionType': 'None', 'SplitType': 'None'}", - "TransformOutput": "{'S3OutputPath': 's3://transform-job/output.tar.gz', 'Accept': 'string', 'AssembleWith': 'None', 'KmsKeyId': 'string'}", - "TransformResources": "{'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeKmsKeyId': 'string'}", - "CreationTime": "2015-01-01 00:00:00+00:00", - "TransformStartTime": "2015-01-01 00:00:00+00:00", - "TransformEndTime": "2015-01-01 00:00:00+00:00", - "LabelingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job", - "AutoMLJobArn": "arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job", - "DataProcessing": "{'InputFilter': 'string', 'OutputFilter': 'string', 'JoinSource': 'Input'}", - "ExperimentConfig": "{'ExperimentName': 'string', 'TrialName': 'string', 'TrialComponentDisplayName': 'string'}", - "jobType": "transform" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/transform-jobs/a-transform-job", - "name": "a-transform-job", - "description": null, - "type": { - "string": "SAGEMAKER" - }, - "flowUrn": null, - "status": "IN_PROGRESS" - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/transform" - ] - } - }, - { - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/input-data-source.tar_gz,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/output.tar_gz,PROD)" - ], - "inputDatajobs": [ - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job)", - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "TrainingJobName": "a-training-job", + "TrainingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job", + "TuningJobArn": "string", + "LabelingJobArn": "string", + "AutoMLJobArn": "string", + "ModelArtifacts": "{'S3ModelArtifacts': 's3://the-first-model-data-url/data.tar.gz'}", + "TrainingJobStatus": "InProgress", + "SecondaryStatus": "Starting", + "FailureReason": "string", + "HyperParameters": "{'parameter-1': 'some-value', 'parameter-2': 'another-value'}", + "AlgorithmSpecification": "{'TrainingImage': 'string', 'AlgorithmName': 'string', 'TrainingInputMode': 'Pipe', 'MetricDefinitions': [{'Name': 'string', 'Regex': 'string'}], 'EnableSageMakerMetricsTimeSeries': True}", + "RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole", + "InputDataConfig": "[{'ChannelName': 'string', 'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://training-job/input-dataset.tar.gz', 'S3DataDistributionType': 'FullyReplicated', 'AttributeNames': ['string']}, 'FileSystemDataSource': {'FileSystemId': 'abcdefgihjklmnopqrstuvwxyz', 'FileSystemAccessMode': 'rw', 'FileSystemType': 'EFS', 'DirectoryPath': 'string'}}, 'ContentType': 'string', 'CompressionType': 'None', 'RecordWrapperType': 'None', 'InputMode': 'Pipe', 'ShuffleConfig': {'Seed': 123}}]", + "OutputDataConfig": "{'KmsKeyId': 'string', 'S3OutputPath': 's3://training-job/output-data.tar.gz'}", + "ResourceConfig": "{'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeSizeInGB': 123, 'VolumeKmsKeyId': 'string'}", + "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", + "StoppingCondition": "{'MaxRuntimeInSeconds': 123, 'MaxWaitTimeInSeconds': 123}", + "CreationTime": "2015-01-01 00:00:00+00:00", + "TrainingStartTime": "2015-01-01 00:00:00+00:00", + "TrainingEndTime": "2015-01-01 00:00:00+00:00", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "SecondaryStatusTransitions": "[{'Status': 'Starting', 'StartTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'EndTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), 'StatusMessage': 'string'}]", + "FinalMetricDataList": "[{'MetricName': 'some-metric', 'Value': 1.0, 'Timestamp': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}, {'MetricName': 'another-metric', 'Value': 1.0, 'Timestamp': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}, {'MetricName': 'some-metric', 'Value': 0.0, 'Timestamp': datetime.datetime(2014, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}]", + "EnableNetworkIsolation": "True", + "EnableInterContainerTrafficEncryption": "True", + "EnableManagedSpotTraining": "True", + "CheckpointConfig": "{'S3Uri': 's3://training-job/checkpoint-config.tar.gz', 'LocalPath': 'string'}", + "TrainingTimeInSeconds": "123", + "BillableTimeInSeconds": "123", + "DebugHookConfig": "{'LocalPath': 'string', 'S3OutputPath': 's3://training-job/debug-hook-config.tar.gz', 'HookParameters': {'string': 'string'}, 'CollectionConfigurations': [{'CollectionName': 'string', 'CollectionParameters': {'string': 'string'}}]}", + "ExperimentConfig": "{'ExperimentName': 'string', 'TrialName': 'string', 'TrialComponentDisplayName': 'string'}", + "DebugRuleConfigurations": "[{'RuleConfigurationName': 'string', 'LocalPath': 'string', 'S3OutputPath': 's3://training-job/debug-rule-config.tar.gz', 'RuleEvaluatorImage': 'string', 'InstanceType': 'ml.t3.medium', 'VolumeSizeInGB': 123, 'RuleParameters': {'string': 'string'}}]", + "TensorBoardOutputConfig": "{'LocalPath': 'string', 'S3OutputPath': 's3://training-job/tensorboard-output-config.tar.gz'}", + "DebugRuleEvaluationStatuses": "[{'RuleConfigurationName': 'string', 'RuleEvaluationJobArn': 'string', 'RuleEvaluationStatus': 'InProgress', 'StatusDetails': 'string', 'LastModifiedTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}]", + "ProfilerConfig": "{'S3OutputPath': 's3://training-job/profiler-config.tar.gz', 'ProfilingIntervalInMilliseconds': 123, 'ProfilingParameters': {'string': 'string'}}", + "ProfilerRuleConfigurations": "[{'RuleConfigurationName': 'string', 'LocalPath': 'string', 'S3OutputPath': 's3://training-job/profiler-rule-config.tar.gz', 'RuleEvaluatorImage': 'string', 'InstanceType': 'ml.t3.medium', 'VolumeSizeInGB': 123, 'RuleParameters': {'string': 'string'}}]", + "ProfilerRuleEvaluationStatuses": "[{'RuleConfigurationName': 'string', 'RuleEvaluationJobArn': 'string', 'RuleEvaluationStatus': 'InProgress', 'StatusDetails': 'string', 'LastModifiedTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}]", + "ProfilingStatus": "Enabled", + "RetryStrategy": "{'MaximumRetryAttempts': 123}", + "Environment": "{'string': 'string'}", + "jobType": "training" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/jobs/a-training-job", + "name": "a-training-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "IN_PROGRESS" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/training" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/input-dataset.tar_gz,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/checkpoint-config.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-hook-config.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-rule-config.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/output-data.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-config.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-rule-config.tar_gz,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/tensorboard-output-config.tar_gz,PROD)" + ], + "inputDatajobs": [] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLModelDeploymentSnapshot": { - "urn": "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,the-first-endpoint,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties": { - "customProperties": { - "EndpointArn": "arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-first-endpoint", - "EndpointConfigName": "string", - "ProductionVariants": "[{'VariantName': 'string', 'DeployedImages': [{'SpecifiedImage': 'string', 'ResolvedImage': 'string', 'ResolutionTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}], 'CurrentWeight': 0.1, 'DesiredWeight': 0.1, 'CurrentInstanceCount': 123, 'DesiredInstanceCount': 123}]", - "DataCaptureConfig": "{'EnableCapture': True, 'CaptureStatus': 'Started', 'CurrentSamplingPercentage': 123, 'DestinationS3Uri': 'string', 'KmsKeyId': 'string'}", - "EndpointStatus": "InService", - "FailureReason": "string", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "LastDeploymentConfig": "{'BlueGreenUpdatePolicy': {'TrafficRoutingConfiguration': {'Type': 'ALL_AT_ONCE', 'WaitIntervalInSeconds': 123, 'CanarySize': {'Type': 'INSTANCE_COUNT', 'Value': 123}}, 'TerminationWaitInSeconds': 123, 'MaximumExecutionTimeoutInSeconds': 600}, 'AutoRollbackConfiguration': {'Alarms': [{'AlarmName': 'string'}]}}" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/endpoints/the-first-endpoint", - "description": null, - "createdAt": 1420070400000, - "version": null, - "status": "IN_SERVICE" - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot": { + "urn": "urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataFlowInfo": { + "customProperties": {}, + "name": "a-transform-job" + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLModelDeploymentSnapshot": { - "urn": "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,the-second-endpoint,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties": { - "customProperties": { - "EndpointArn": "arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-second-endpoint", - "EndpointConfigName": "string", - "ProductionVariants": "[{'VariantName': 'string', 'DeployedImages': [{'SpecifiedImage': 'string', 'ResolvedImage': 'string', 'ResolutionTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}], 'CurrentWeight': 0.1, 'DesiredWeight': 0.1, 'CurrentInstanceCount': 123, 'DesiredInstanceCount': 123}]", - "DataCaptureConfig": "{'EnableCapture': True, 'CaptureStatus': 'Started', 'CurrentSamplingPercentage': 123, 'DestinationS3Uri': 'string', 'KmsKeyId': 'string'}", - "EndpointStatus": "Creating", - "FailureReason": "string", - "LastModifiedTime": "2015-01-01 00:00:00+00:00", - "LastDeploymentConfig": "{'BlueGreenUpdatePolicy': {'TrafficRoutingConfiguration': {'Type': 'ALL_AT_ONCE', 'WaitIntervalInSeconds': 123, 'CanarySize': {'Type': 'INSTANCE_COUNT', 'Value': 123}}, 'TerminationWaitInSeconds': 123, 'MaximumExecutionTimeoutInSeconds': 600}, 'AutoRollbackConfiguration': {'Alarms': [{'AlarmName': 'string'}]}}" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/endpoints/the-second-endpoint", - "description": null, - "createdAt": 1420070400000, - "version": null, - "status": "CREATING" - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + "com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job)", + "aspects": [ + { + "com.linkedin.pegasus2avro.datajob.DataJobInfo": { + "customProperties": { + "TransformJobName": "a-transform-job", + "TransformJobArn": "arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job", + "TransformJobStatus": "InProgress", + "FailureReason": "string", + "ModelName": "the-second-model", + "MaxConcurrentTransforms": "123", + "ModelClientConfig": "{'InvocationsTimeoutInSeconds': 123, 'InvocationsMaxRetries': 123}", + "MaxPayloadInMB": "123", + "BatchStrategy": "MultiRecord", + "Environment": "{'string': 'string'}", + "TransformInput": "{'DataSource': {'S3DataSource': {'S3DataType': 'ManifestFile', 'S3Uri': 's3://transform-job/input-data-source.tar.gz'}}, 'ContentType': 'string', 'CompressionType': 'None', 'SplitType': 'None'}", + "TransformOutput": "{'S3OutputPath': 's3://transform-job/output.tar.gz', 'Accept': 'string', 'AssembleWith': 'None', 'KmsKeyId': 'string'}", + "TransformResources": "{'InstanceType': 'ml.m4.xlarge', 'InstanceCount': 123, 'VolumeKmsKeyId': 'string'}", + "CreationTime": "2015-01-01 00:00:00+00:00", + "TransformStartTime": "2015-01-01 00:00:00+00:00", + "TransformEndTime": "2015-01-01 00:00:00+00:00", + "LabelingJobArn": "arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job", + "AutoMLJobArn": "arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job", + "DataProcessing": "{'InputFilter': 'string', 'OutputFilter': 'string', 'JoinSource': 'Input'}", + "ExperimentConfig": "{'ExperimentName': 'string', 'TrialName': 'string', 'TrialComponentDisplayName': 'string'}", + "jobType": "transform" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/transform-jobs/a-transform-job", + "name": "a-transform-job", + "type": { + "string": "SAGEMAKER" + }, + "status": "IN_PROGRESS" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/transform" + ] + } + }, + { + "com.linkedin.pegasus2avro.datajob.DataJobInputOutput": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/input-data-source.tar_gz,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/output.tar_gz,PROD)" + ], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job)", + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job)" + ] + } + } + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLModelGroupSnapshot": { - "urn": "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,a-model-package-group,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLModelGroupProperties": { - "customProperties": { - "ModelPackageGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:model-package-group/a-model-package-group", - "ModelPackageGroupDescription": "Just a model package group.", - "CreatedBy": "{'UserProfileArn': 'arn:aws:sagemaker:us-west-2:123412341234:user-profile/some-domain/some-user', 'UserProfileName': 'some-user', 'DomainId': 'some-domain'}", - "ModelPackageGroupStatus": "Completed" - }, - "description": "Just a model package group.", - "createdAt": 1420070400000, - "version": null - } - }, - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:some-user", - "type": "DATAOWNER", - "source": null + "com.linkedin.pegasus2avro.metadata.snapshot.MLModelDeploymentSnapshot": { + "urn": "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,the-first-endpoint,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties": { + "customProperties": { + "EndpointArn": "arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-first-endpoint", + "EndpointConfigName": "string", + "ProductionVariants": "[{'VariantName': 'string', 'DeployedImages': [{'SpecifiedImage': 'string', 'ResolvedImage': 'string', 'ResolutionTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}], 'CurrentWeight': 0.1, 'DesiredWeight': 0.1, 'CurrentInstanceCount': 123, 'DesiredInstanceCount': 123}]", + "DataCaptureConfig": "{'EnableCapture': True, 'CaptureStatus': 'Started', 'CurrentSamplingPercentage': 123, 'DestinationS3Uri': 'string', 'KmsKeyId': 'string'}", + "EndpointStatus": "InService", + "FailureReason": "string", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "LastDeploymentConfig": "{'BlueGreenUpdatePolicy': {'TrafficRoutingConfiguration': {'Type': 'ALL_AT_ONCE', 'WaitIntervalInSeconds': 123, 'CanarySize': {'Type': 'INSTANCE_COUNT', 'Value': 123}}, 'TerminationWaitInSeconds': 123, 'MaximumExecutionTimeoutInSeconds': 600}, 'AutoRollbackConfiguration': {'Alarms': [{'AlarmName': 'string'}]}}" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/endpoints/the-first-endpoint", + "createdAt": 1420070400000, + "status": "IN_SERVICE" + } } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - } - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/sagemaker" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLModelSnapshot": { - "urn": "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,the-first-model,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLModelProperties": { - "customProperties": { - "PrimaryContainer": "{'ContainerHostname': 'string', 'Image': '123412341234.dkr.ecr.us-west-2.amazonaws.com/the-first-model-image', 'ImageConfig': {'RepositoryAccessMode': 'Platform', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'SingleModel', 'ModelDataUrl': 's3://the-first-model-data-url/data.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Enabled'}}", - "Containers": "[{'ContainerHostname': 'string', 'Image': 'string', 'ImageConfig': {'RepositoryAccessMode': 'Platform', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'SingleModel', 'ModelDataUrl': 's3://training-job-2/model-artifact.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Enabled'}}]", - "InferenceExecutionConfig": "{'Mode': 'Serial'}", - "ExecutionRoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMaker-ExecutionRole-20210614T104201", - "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", - "ModelArn": "arn:aws:sagemaker:us-west-2:123412341234:model/the-first-model", - "EnableNetworkIsolation": "True" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/models/the-first-model", - "description": null, - "date": 1420070400000, - "version": null, - "type": null, - "hyperParameters": null, - "hyperParams": [ - { - "name": "parameter-1", - "description": null, - "value": "some-value", - "createdAt": null - }, + "com.linkedin.pegasus2avro.metadata.snapshot.MLModelDeploymentSnapshot": { + "urn": "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,the-second-endpoint,PROD)", + "aspects": [ { - "name": "parameter-2", - "description": null, - "value": "another-value", - "createdAt": null + "com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties": { + "customProperties": { + "EndpointArn": "arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-second-endpoint", + "EndpointConfigName": "string", + "ProductionVariants": "[{'VariantName': 'string', 'DeployedImages': [{'SpecifiedImage': 'string', 'ResolvedImage': 'string', 'ResolutionTime': datetime.datetime(2015, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)}], 'CurrentWeight': 0.1, 'DesiredWeight': 0.1, 'CurrentInstanceCount': 123, 'DesiredInstanceCount': 123}]", + "DataCaptureConfig": "{'EnableCapture': True, 'CaptureStatus': 'Started', 'CurrentSamplingPercentage': 123, 'DestinationS3Uri': 'string', 'KmsKeyId': 'string'}", + "EndpointStatus": "Creating", + "FailureReason": "string", + "LastModifiedTime": "2015-01-01 00:00:00+00:00", + "LastDeploymentConfig": "{'BlueGreenUpdatePolicy': {'TrafficRoutingConfiguration': {'Type': 'ALL_AT_ONCE', 'WaitIntervalInSeconds': 123, 'CanarySize': {'Type': 'INSTANCE_COUNT', 'Value': 123}}, 'TerminationWaitInSeconds': 123, 'MaximumExecutionTimeoutInSeconds': 600}, 'AutoRollbackConfiguration': {'Alarms': [{'AlarmName': 'string'}]}}" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/endpoints/the-second-endpoint", + "createdAt": 1420070400000, + "status": "CREATING" + } } - ], - "trainingMetrics": [ + ] + } + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.MLModelGroupSnapshot": { + "urn": "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,a-model-package-group,PROD)", + "aspects": [ { - "name": "another-metric", - "description": null, - "value": "1.0", - "createdAt": null + "com.linkedin.pegasus2avro.ml.metadata.MLModelGroupProperties": { + "customProperties": { + "ModelPackageGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:model-package-group/a-model-package-group", + "ModelPackageGroupDescription": "Just a model package group.", + "CreatedBy": "{'UserProfileArn': 'arn:aws:sagemaker:us-west-2:123412341234:user-profile/some-domain/some-user', 'UserProfileName': 'some-user', 'DomainId': 'some-domain'}", + "ModelPackageGroupStatus": "Completed" + }, + "description": "Just a model package group.", + "createdAt": 1420070400000 + } }, { - "name": "some-metric", - "description": null, - "value": "1.0", - "createdAt": null + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:some-user", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/sagemaker" + ] + } } - ], - "onlineMetrics": null, - "mlFeatures": null, - "tags": [], - "deployments": [ - "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-first-endpoint,PROD)" - ], - "trainingJobs": [ - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)" - ], - "downstreamJobs": [], - "groups": [ - "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,a-model-package-group,PROD)" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/sagemaker/a-model-package-group" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - }, - { - "auditHeader": null, + ] + } + } +}, +{ "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLModelSnapshot": { - "urn": "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,the-second-model,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLModelProperties": { - "customProperties": { - "PrimaryContainer": "{'ContainerHostname': 'string', 'Image': '123412341234.dkr.ecr.us-west-2.amazonaws.com/the-second-model-image', 'ImageConfig': {'RepositoryAccessMode': 'Platform', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'MultiModel', 'ModelDataUrl': 's3://the-second-model-data-url/data.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Disabled'}}", - "Containers": "[{'ContainerHostname': 'string', 'Image': 'string', 'ImageConfig': {'RepositoryAccessMode': 'Vpc', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'SingleModel', 'ModelDataUrl': 's3://the-first-model-data-url/data.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Disabled'}}]", - "InferenceExecutionConfig": "{'Mode': 'Serial'}", - "ExecutionRoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMaker-ExecutionRole-20210614T104201", - "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", - "ModelArn": "arn:aws:sagemaker:us-west-2:123412341234:model/the-second-model", - "EnableNetworkIsolation": "False" - }, - "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/models/the-second-model", - "description": null, - "date": 1420070400000, - "version": null, - "type": null, - "hyperParameters": null, - "hyperParams": [ - { - "name": "parameter-1", - "description": null, - "value": "some-value", - "createdAt": null + "com.linkedin.pegasus2avro.metadata.snapshot.MLModelSnapshot": { + "urn": "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,the-first-model,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.ml.metadata.MLModelProperties": { + "customProperties": { + "PrimaryContainer": "{'ContainerHostname': 'string', 'Image': '123412341234.dkr.ecr.us-west-2.amazonaws.com/the-first-model-image', 'ImageConfig': {'RepositoryAccessMode': 'Platform', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'SingleModel', 'ModelDataUrl': 's3://the-first-model-data-url/data.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Enabled'}}", + "Containers": "[{'ContainerHostname': 'string', 'Image': 'string', 'ImageConfig': {'RepositoryAccessMode': 'Platform', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'SingleModel', 'ModelDataUrl': 's3://training-job-2/model-artifact.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Enabled'}}]", + "InferenceExecutionConfig": "{'Mode': 'Serial'}", + "ExecutionRoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMaker-ExecutionRole-20210614T104201", + "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", + "ModelArn": "arn:aws:sagemaker:us-west-2:123412341234:model/the-first-model", + "EnableNetworkIsolation": "True" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/models/the-first-model", + "date": 1420070400000, + "hyperParams": [ + { + "name": "parameter-1", + "value": "some-value" + }, + { + "name": "parameter-2", + "value": "another-value" + } + ], + "trainingMetrics": [ + { + "name": "another-metric", + "value": "1.0" + }, + { + "name": "some-metric", + "value": "1.0" + } + ], + "tags": [], + "deployments": [ + "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-first-endpoint,PROD)" + ], + "trainingJobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)" + ], + "downstreamJobs": [], + "groups": [ + "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,a-model-package-group,PROD)" + ] + } }, { - "name": "parameter-2", - "description": null, - "value": "another-value", - "createdAt": null + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/sagemaker/a-model-package-group" + ] + } } - ], - "trainingMetrics": [ + ] + } + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.MLModelSnapshot": { + "urn": "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,the-second-model,PROD)", + "aspects": [ { - "name": "another-metric", - "description": null, - "value": "1.0", - "createdAt": null + "com.linkedin.pegasus2avro.ml.metadata.MLModelProperties": { + "customProperties": { + "PrimaryContainer": "{'ContainerHostname': 'string', 'Image': '123412341234.dkr.ecr.us-west-2.amazonaws.com/the-second-model-image', 'ImageConfig': {'RepositoryAccessMode': 'Platform', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'MultiModel', 'ModelDataUrl': 's3://the-second-model-data-url/data.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Disabled'}}", + "Containers": "[{'ContainerHostname': 'string', 'Image': 'string', 'ImageConfig': {'RepositoryAccessMode': 'Vpc', 'RepositoryAuthConfig': {'RepositoryCredentialsProviderArn': 'string'}}, 'Mode': 'SingleModel', 'ModelDataUrl': 's3://the-first-model-data-url/data.tar.gz', 'Environment': {'string': 'string'}, 'ModelPackageName': 'string', 'MultiModelConfig': {'ModelCacheSetting': 'Disabled'}}]", + "InferenceExecutionConfig": "{'Mode': 'Serial'}", + "ExecutionRoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMaker-ExecutionRole-20210614T104201", + "VpcConfig": "{'SecurityGroupIds': ['string'], 'Subnets': ['string']}", + "ModelArn": "arn:aws:sagemaker:us-west-2:123412341234:model/the-second-model", + "EnableNetworkIsolation": "False" + }, + "externalUrl": "https://us-west-2.console.aws.amazon.com/sagemaker/home?region=us-west-2#/models/the-second-model", + "date": 1420070400000, + "hyperParams": [ + { + "name": "parameter-1", + "value": "some-value" + }, + { + "name": "parameter-2", + "value": "another-value" + } + ], + "trainingMetrics": [ + { + "name": "another-metric", + "value": "1.0" + }, + { + "name": "some-metric", + "value": "1.0" + } + ], + "tags": [], + "deployments": [ + "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-second-endpoint,PROD)" + ], + "trainingJobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)" + ], + "downstreamJobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job)", + "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job)" + ], + "groups": [ + "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,a-model-package-group,PROD)" + ] + } }, { - "name": "some-metric", - "description": null, - "value": "1.0", - "createdAt": null + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/sagemaker/a-model-package-group" + ] + } } - ], - "onlineMetrics": null, - "mlFeatures": null, - "tags": [], - "deployments": [ - "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,arn:aws:sagemaker:us-west-2:123412341234:endpoint/the-second-endpoint,PROD)" - ], - "trainingJobs": [ - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)" - ], - "downstreamJobs": [ - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job)", - "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job)" - ], - "groups": [ - "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,a-model-package-group,PROD)" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/sagemaker/a-model-package-group" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": null - } -] + ] + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,hyper_parameter_tuning:a-hyper-parameter-tuning-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,processing:a-processing-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,training:a-training-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,auto_ml:an-auto-ml-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:auto-ml-job/an-auto-ml-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,compilation:a-compilation-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:compilation-job/a-compilation-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,edge_packaging:an-edge-packaging-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:edge-packaging-job/an-edge-packaging-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,hyper_parameter_tuning:a-hyper-parameter-tuning-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:hyper-parameter-tuning-job/a-hyper-parameter-tuning-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,labeling:a-labeling-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:labeling-job/a-labeling-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,processing:a-processing-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:processing-job/a-processing-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,training:a-training-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:training-job/a-training-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(sagemaker,transform:a-transform-job,PROD),arn:aws:sagemaker:us-west-2:123412341234:transform-job/a-transform-job)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-input-bucket/file_txt,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,auto-ml-job-output-bucket/file_txt,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/input-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,compilation-job-bucket/output-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/model-artifact.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,edge-packaging-bucket/output-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/category-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/data-source.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,labeling-job/output-dataset.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,processing-job/input-data.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/checkpoint-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-hook-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/debug-rule-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/input-dataset.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/output-data.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/profiler-rule-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,training-job/tensorboard-output-config.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/input-data-source.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,transform-job/output.tar_gz,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test,feature_2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test,feature_3)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test-1,height)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test-1,name)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test-1,time)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test-2,some-feature-1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeature", + "entityUrn": "urn:li:mlFeature:(test-2,some-feature-3)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test-1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlFeatureTable", + "entityUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test-2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlModel", + "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,the-first-model,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlModel", + "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,the-second-model,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlModelDeployment", + "entityUrn": "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,the-first-endpoint,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlModelDeployment", + "entityUrn": "urn:li:mlModelDeployment:(urn:li:dataPlatform:sagemaker,the-second-endpoint,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlModelGroup", + "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,a-model-package-group,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlPrimaryKey", + "entityUrn": "urn:li:mlPrimaryKey:(test,feature_1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlPrimaryKey", + "entityUrn": "urn:li:mlPrimaryKey:(test-1,id)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "mlPrimaryKey", + "entityUrn": "urn:li:mlPrimaryKey:(test-2,some-feature-2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +} +] \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 911ab993e5789..e816700e583c7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -277,7 +277,7 @@ public SearchResult search(@Nonnull String entity, @Nonnull String input, @Nullable SearchFlags searchFlags) throws RemoteInvocationException { - return ValidationUtils.validateSearchResult(_entitySearchService.search(entity, input, newFilter(requestFilters), + return ValidationUtils.validateSearchResult(_entitySearchService.search(List.of(entity), input, newFilter(requestFilters), null, start, count, searchFlags), _entityService); } @@ -329,7 +329,7 @@ public SearchResult search( @Nullable SearchFlags searchFlags) throws RemoteInvocationException { return ValidationUtils.validateSearchResult( - _entitySearchService.search(entity, input, filter, sortCriterion, start, count, searchFlags), _entityService); + _entitySearchService.search(List.of(entity), input, filter, sortCriterion, start, count, searchFlags), _entityService); } @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java index f3c0d14bddac3..a14ea8d36b0c1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java @@ -44,7 +44,7 @@ public Map docCountPerEntity(@Nonnull List entityNames) { * Gets a list of documents that match given search request. The results are aggregated and filters are applied to the * search hits and not the aggregation results. * - * @param entityName name of the entity + * @param entityNames names of the entity * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search hits * @param sortCriterion {@link SortCriterion} to be applied to search results @@ -54,10 +54,10 @@ public Map docCountPerEntity(@Nonnull List entityNames) { * @return a {@link SearchResult} that contains a list of matched documents and related search result metadata */ @Nonnull - public SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, + public SearchResult search(@Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, int from, int size, @Nullable SearchFlags searchFlags) { SearchResult result = - _cachingEntitySearchService.search(entityName, input, postFilters, sortCriterion, from, size, searchFlags, null); + _cachingEntitySearchService.search(entityNames, input, postFilters, sortCriterion, from, size, searchFlags, null); try { return result.copy().setEntities(new SearchEntityArray(_searchRanker.rank(result.getEntities()))); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java b/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java index ee93edaf2480c..1af94141366e1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java @@ -171,7 +171,7 @@ private Map getSearchResultsForEachEntity(@Nonnull List new Pair<>(entity, - _cachingEntitySearchService.search(entity, input, postFilters, sortCriterion, queryFrom, querySize, searchFlags, facets))) + _cachingEntitySearchService.search(List.of(entity), input, postFilters, sortCriterion, queryFrom, querySize, searchFlags, facets))) .stream() .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index 804ab3303f6b0..f698e28c0be6d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -53,7 +53,7 @@ public class CachingEntitySearchService { * @return a {@link SearchResult} containing the requested batch of search results */ public SearchResult search( - @Nonnull String entityName, + @Nonnull List entityNames, @Nonnull String query, @Nullable Filter filters, @Nullable SortCriterion sortCriterion, @@ -61,7 +61,7 @@ public SearchResult search( int size, @Nullable SearchFlags flags, @Nullable List facets) { - return getCachedSearchResults(entityName, query, filters, sortCriterion, from, size, flags, facets); + return getCachedSearchResults(entityNames, query, filters, sortCriterion, from, size, flags, facets); } /** @@ -141,7 +141,7 @@ public ScrollResult scroll( * This lets us have batches that return a variable number of results (we have no idea which batch the "from" "size" page corresponds to) */ public SearchResult getCachedSearchResults( - @Nonnull String entityName, + @Nonnull List entityNames, @Nonnull String query, @Nullable Filter filters, @Nullable SortCriterion sortCriterion, @@ -152,10 +152,10 @@ public SearchResult getCachedSearchResults( return new CacheableSearcher<>( cacheManager.getCache(ENTITY_SEARCH_SERVICE_SEARCH_CACHE_NAME), batchSize, - querySize -> getRawSearchResults(entityName, query, filters, sortCriterion, querySize.getFrom(), + querySize -> getRawSearchResults(entityNames, query, filters, sortCriterion, querySize.getFrom(), querySize.getSize(), flags, facets), - querySize -> Quintet.with(entityName, query, filters != null ? toJsonString(filters) : null, - sortCriterion != null ? toJsonString(sortCriterion) : null, querySize), flags, enableCache).getSearchResults(from, size); + querySize -> Sextet.with(entityNames, query, filters != null ? toJsonString(filters) : null, + sortCriterion != null ? toJsonString(sortCriterion) : null, facets, querySize), flags, enableCache).getSearchResults(from, size); } @@ -272,7 +272,7 @@ public ScrollResult getCachedScrollResults( * Executes the expensive search query using the {@link EntitySearchService} */ private SearchResult getRawSearchResults( - final String entityName, + final List entityNames, final String input, final Filter filters, final SortCriterion sortCriterion, @@ -280,7 +280,7 @@ private SearchResult getRawSearchResults( final int count, @Nullable final SearchFlags searchFlags, @Nullable final List facets) { - return entitySearchService.search(entityName, input, filters, sortCriterion, start, count, searchFlags, facets); + return entitySearchService.search(entityNames, input, filters, sortCriterion, start, count, searchFlags, facets); } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 0680e6d74edf3..ce7b44c715d6b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -106,18 +106,18 @@ public void appendRunId(@Nonnull String entityName, @Nonnull Urn urn, @Nullable @Nonnull @Override - public SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, + public SearchResult search(@Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, int from, int size, @Nullable SearchFlags searchFlags) { - return search(entityName, input, postFilters, sortCriterion, from, size, searchFlags, null); + return search(entityNames, input, postFilters, sortCriterion, from, size, searchFlags, null); } @Nonnull - public SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, + public SearchResult search(@Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, int from, int size, @Nullable SearchFlags searchFlags, @Nullable List facets) { log.debug(String.format( "Searching FullText Search documents entityName: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s", - entityName, input, postFilters, sortCriterion, from, size)); - return esSearchDAO.search(entityName, input, postFilters, sortCriterion, from, size, searchFlags, facets); + entityNames, input, postFilters, sortCriterion, from, size)); + return esSearchDAO.search(entityNames, input, postFilters, sortCriterion, from, size, searchFlags, facets); } @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index 51e08763cd7c8..57e8967c83985 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -79,7 +79,7 @@ public long docCount(@Nonnull String entityName) { @Nonnull @WithSpan - private SearchResult executeAndExtract(@Nonnull EntitySpec entitySpec, @Nonnull SearchRequest searchRequest, + private SearchResult executeAndExtract(@Nonnull List entitySpec, @Nonnull SearchRequest searchRequest, @Nullable Filter filter, int from, int size) { long id = System.currentTimeMillis(); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "executeAndExtract_search").time()) { @@ -181,20 +181,22 @@ private ScrollResult executeAndExtract(@Nonnull List entitySpecs, @N * @return a {@link SearchResult} that contains a list of matched documents and related search result metadata */ @Nonnull - public SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, + public SearchResult search(@Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, int from, int size, @Nullable SearchFlags searchFlags, @Nullable List facets) { final String finalInput = input.isEmpty() ? "*" : input; Timer.Context searchRequestTimer = MetricUtils.timer(this.getClass(), "searchRequest").time(); - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + List entitySpecs = entityNames.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); // Step 1: construct the query final SearchRequest searchRequest = SearchRequestHandler - .getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + .getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getSearchRequest(finalInput, transformedFilters, sortCriterion, from, size, searchFlags, facets); - searchRequest.indices(indexConvention.getIndexName(entitySpec)); + searchRequest.indices(entityNames.stream() + .map(indexConvention::getEntityIndexName) + .toArray(String[]::new)); searchRequestTimer.stop(); // Step 2: execute the query and extract results, validated against document model as well - return executeAndExtract(entitySpec, searchRequest, transformedFilters, from, size); + return executeAndExtract(entitySpecs, searchRequest, transformedFilters, from, size); } /** @@ -217,7 +219,7 @@ public SearchResult filter(@Nonnull String entityName, @Nullable Filter filters, .getFilterRequest(transformedFilters, sortCriterion, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); - return executeAndExtract(entitySpec, searchRequest, transformedFilters, from, size); + return executeAndExtract(List.of(entitySpec), searchRequest, transformedFilters, from, size); } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java index 1487ac58d6a0a..a7f5ea7a51e29 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.List; import java.util.regex.Pattern; +import java.util.stream.Collectors; import static com.linkedin.metadata.Constants.CONTAINER_ASPECT_NAME; @@ -140,7 +141,9 @@ private static BrowsePathEntryArray getContainerPathEntries(@Nonnull final Urn e private static BrowsePathEntryArray getDefaultDatasetPathEntries(@Nonnull final String datasetName, @Nonnull final Character delimiter) { BrowsePathEntryArray browsePathEntries = new BrowsePathEntryArray(); if (datasetName.contains(delimiter.toString())) { - final List datasetNamePathParts = Arrays.asList(datasetName.split(Pattern.quote(delimiter.toString()))); + final List datasetNamePathParts = Arrays.stream(datasetName.split(Pattern.quote(delimiter.toString()))) + .filter((name) -> !name.isEmpty()) + .collect(Collectors.toList()); // Omit the name from the path. datasetNamePathParts.subList(0, datasetNamePathParts.size() - 1).forEach((part -> { browsePathEntries.add(createBrowsePathEntry(part, null)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java index c1297866edcc4..79496888650e1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java @@ -68,15 +68,24 @@ private ESTestUtils() { .collect(Collectors.toList()); } - public static SearchResult search(SearchService searchService, String query) { - return search(searchService, query, null); + public static SearchResult searchAcrossEntities(SearchService searchService, String query) { + return searchAcrossEntities(searchService, query, null); } - public static SearchResult search(SearchService searchService, String query, @Nullable List facets) { + public static SearchResult searchAcrossEntities(SearchService searchService, String query, @Nullable List facets) { return searchService.searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, null, 0, 100, new SearchFlags().setFulltext(true).setSkipCache(true), facets); } + public static SearchResult search(SearchService searchService, String query) { + return search(searchService, SEARCHABLE_ENTITIES, query); + } + + public static SearchResult search(SearchService searchService, List entities, String query) { + return searchService.search(entities, query, null, null, 0, 100, + new SearchFlags().setFulltext(true).setSkipCache(true)); + } + public static ScrollResult scroll(SearchService searchService, String query, int batchSize, @Nullable String scrollId) { return searchService.scrollAcrossEntities(SEARCHABLE_ENTITIES, query, null, null, scrollId, "3m", batchSize, new SearchFlags().setFulltext(true).setSkipCache(true)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java index 0e8d881b70791..9a6d2dc6fc1fa 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java @@ -23,6 +23,7 @@ import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import java.util.List; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Import; @@ -93,7 +94,7 @@ private ElasticSearchService buildService() { @Test public void testElasticSearchServiceStructuredQuery() throws Exception { - SearchResult searchResult = _elasticSearchService.search(ENTITY_NAME, "test", null, null, 0, 10, new SearchFlags().setFulltext(false)); + SearchResult searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); BrowseResult browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); @@ -110,10 +111,10 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(_bulkProcessor); - searchResult = _elasticSearchService.search(ENTITY_NAME, "test", null, null, 0, 10, new SearchFlags().setFulltext(false)); + searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); - searchResult = _elasticSearchService.search(ENTITY_NAME, "foreignKey:Node", null, null, 0, 10, new SearchFlags().setFulltext(false)); + searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "foreignKey:Node", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); @@ -135,7 +136,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(_bulkProcessor); - searchResult = _elasticSearchService.search(ENTITY_NAME, "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); + searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn2); browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); @@ -152,7 +153,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); syncAfterWrite(_bulkProcessor); - searchResult = _elasticSearchService.search(ENTITY_NAME, "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); + searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false)); assertEquals(searchResult.getNumEntities().intValue(), 0); browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); @@ -162,7 +163,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { @Test public void testElasticSearchServiceFulltext() throws Exception { - SearchResult searchResult = _elasticSearchService.search(ENTITY_NAME, "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); + SearchResult searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); Urn urn = new TestEntityUrn("test", "urn1", "VALUE_1"); @@ -175,7 +176,7 @@ public void testElasticSearchServiceFulltext() throws Exception { _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); syncAfterWrite(_bulkProcessor); - searchResult = _elasticSearchService.search(ENTITY_NAME, "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); + searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn); @@ -192,7 +193,7 @@ public void testElasticSearchServiceFulltext() throws Exception { _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(_bulkProcessor); - searchResult = _elasticSearchService.search(ENTITY_NAME, "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); + searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 1); assertEquals(searchResult.getEntities().get(0).getEntity(), urn2); @@ -203,7 +204,7 @@ public void testElasticSearchServiceFulltext() throws Exception { _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); syncAfterWrite(_bulkProcessor); - searchResult = _elasticSearchService.search(ENTITY_NAME, "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); + searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true)); assertEquals(searchResult.getNumEntities().intValue(), 0); assertEquals(_elasticSearchService.docCount(ENTITY_NAME), 0); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java index 817cf5aa5b37b..f4a8400fb005c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java @@ -54,6 +54,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; +import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.ESTestUtils.*; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX; import static com.linkedin.metadata.utils.SearchUtil.*; @@ -189,10 +190,10 @@ public void testDatasetHasTags() throws IOException { @Test public void testFixtureInitialization() { assertNotNull(searchService); - SearchResult noResult = search(searchService, "no results"); + SearchResult noResult = searchAcrossEntities(searchService, "no results"); assertEquals(0, noResult.getEntities().size()); - final SearchResult result = search(searchService, "test"); + final SearchResult result = searchAcrossEntities(searchService, "test"); Map expectedTypes = Map.of( "dataset", 13, @@ -238,7 +239,7 @@ public void testDataPlatform() { .build(); expected.forEach((key, value) -> { - SearchResult result = search(searchService, key); + SearchResult result = searchAcrossEntities(searchService, key); assertEquals(result.getEntities().size(), value.intValue(), String.format("Unexpected data platform `%s` hits.", key)); // max is 100 without pagination }); @@ -254,14 +255,14 @@ public void testUrn() { "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)" ).forEach(query -> - assertTrue(search(searchService, query).getEntities().size() >= 1, + assertTrue(searchAcrossEntities(searchService, query).getEntities().size() >= 1, String.format("Unexpected >1 urn result for `%s`", query)) ); } @Test public void testExactTable() { - SearchResult results = search(searchService, "stg_customers"); + SearchResult results = searchAcrossEntities(searchService, "stg_customers"); assertEquals(results.getEntities().size(), 1, "Unexpected single urn result for `stg_customers`"); assertEquals(results.getEntities().get(0).getEntity().toString(), "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)"); @@ -278,7 +279,7 @@ public void testStemming() { testSets.forEach(testSet -> { Integer expectedResults = null; for (String testQuery : testSet) { - SearchResult results = search(searchService, testQuery); + SearchResult results = searchAcrossEntities(searchService, testQuery); assertTrue(results.hasEntities() && !results.getEntities().isEmpty(), String.format("Expected search results for `%s`", testQuery)); @@ -296,7 +297,7 @@ public void testStemmingOverride() throws IOException { Set testSet = Set.of("customer", "customers"); Set results = testSet.stream() - .map(test -> search(searchService, test)) + .map(test -> searchAcrossEntities(searchService, test)) .collect(Collectors.toSet()); results.forEach(r -> assertTrue(r.hasEntities() && !r.getEntities().isEmpty(), "Expected search results")); @@ -349,7 +350,7 @@ public void testDelimitedSynonym() throws IOException { "customer acquisition cost" ); List resultCounts = testSet.stream().map(q -> { - SearchResult result = search(searchService, q); + SearchResult result = searchAcrossEntities(searchService, q); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), "Expected search results for: " + q); return result.getEntities().size(); @@ -382,7 +383,7 @@ public void testUrnSynonym() throws IOException { "big query" ); List results = testSet.stream().map(query -> { - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), "Expected search results for: " + query); return result; }).collect(Collectors.toList()); @@ -621,7 +622,7 @@ public void testSmokeTestQueries() { ); Map results = expectedFulltextMinimums.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> search(searchService, entry.getKey()))); + .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(searchService, entry.getKey()))); results.forEach((key, value) -> { Integer actualCount = value.getEntities().size(); @@ -691,7 +692,7 @@ public void testUnderscore() throws IOException { @Test public void testFacets() { Set expectedFacets = Set.of("entity", "typeNames", "platform", "origin", "tags"); - SearchResult testResult = search(searchService, "cypress"); + SearchResult testResult = searchAcrossEntities(searchService, "cypress"); expectedFacets.forEach(facet -> { assertTrue(testResult.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), String.format("Failed to find facet `%s` in %s", facet, @@ -703,7 +704,7 @@ public void testFacets() { @Test public void testNestedAggregation() { Set expectedFacets = Set.of("platform"); - SearchResult testResult = search(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResult = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); assertEquals(testResult.getMetadata().getAggregations().size(), 1); expectedFacets.forEach(facet -> { assertTrue(testResult.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), @@ -713,7 +714,7 @@ public void testNestedAggregation() { }); expectedFacets = Set.of("platform", "typeNames", "_entityType", "entity"); - SearchResult testResult2 = search(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResult2 = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); assertEquals(testResult2.getMetadata().getAggregations().size(), 4); expectedFacets.forEach(facet -> { assertTrue(testResult2.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), @@ -723,11 +724,11 @@ public void testNestedAggregation() { }); String singleNestedFacet = String.format("_entityType%sowners", AGGREGATION_SEPARATOR_CHAR); expectedFacets = Set.of(singleNestedFacet); - SearchResult testResultSingleNested = search(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResultSingleNested = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); assertEquals(testResultSingleNested.getMetadata().getAggregations().size(), 1); expectedFacets = Set.of("platform", singleNestedFacet, "typeNames", "origin"); - SearchResult testResultNested = search(searchService, "cypress", List.copyOf(expectedFacets)); + SearchResult testResultNested = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets)); assertEquals(testResultNested.getMetadata().getAggregations().size(), 4); expectedFacets.forEach(facet -> { assertTrue(testResultNested.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)), @@ -811,6 +812,19 @@ public void testScrollAcrossEntities() throws IOException { assertEquals(totalResults, 8); } + @Test + public void testSearchAcrossMultipleEntities() { + String query = "logging_events"; + SearchResult result = search(searchService, query); + assertEquals((int) result.getNumEntities(), 8); + result = search(searchService, List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME), query); + assertEquals((int) result.getNumEntities(), 8); + result = search(searchService, List.of(DATASET_ENTITY_NAME), query); + assertEquals((int) result.getNumEntities(), 4); + result = search(searchService, List.of(DATA_JOB_ENTITY_NAME), query); + assertEquals((int) result.getNumEntities(), 4); + } + @Test public void testQuotedAnalyzer() throws IOException { AnalyzeRequest request = AnalyzeRequest.withIndexAnalyzer( @@ -875,7 +889,7 @@ public void testFragmentUrns() { ); testSet.forEach(query -> { - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected partial urn search results", query)); @@ -941,7 +955,7 @@ public void testPlatformTest() { @Test public void testStructQueryFieldMatch() { String query = STRUCTURED_QUERY_PREFIX + "name: customers"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -954,7 +968,7 @@ public void testStructQueryFieldMatch() { @Test public void testStructQueryFieldPrefixMatch() { String query = STRUCTURED_QUERY_PREFIX + "name: customers*"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -967,7 +981,7 @@ public void testStructQueryFieldPrefixMatch() { @Test public void testStructQueryCustomPropertiesKeyPrefix() { String query = STRUCTURED_QUERY_PREFIX + "customProperties: node_type=*"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -980,7 +994,7 @@ public void testStructQueryCustomPropertiesKeyPrefix() { @Test public void testStructQueryCustomPropertiesMatch() { String query = STRUCTURED_QUERY_PREFIX + "customProperties: node_type=model"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -998,7 +1012,7 @@ public void testCustomPropertiesQuoted() { ); Map results = expectedResults.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, entry -> search(searchService, entry.getKey()))); + .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(searchService, entry.getKey()))); results.forEach((key, value) -> { Integer actualCount = value.getEntities().size(); @@ -1012,7 +1026,7 @@ public void testCustomPropertiesQuoted() { @Test public void testStructQueryFieldPaths() { String query = STRUCTURED_QUERY_PREFIX + "fieldPaths: customer_id"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1025,7 +1039,7 @@ public void testStructQueryFieldPaths() { @Test public void testStructQueryBoolean() { String query = STRUCTURED_QUERY_PREFIX + "editedFieldTags:urn\\:li\\:tag\\:Legacy OR tags:urn\\:li\\:tag\\:testTag"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1035,7 +1049,7 @@ public void testStructQueryBoolean() { assertEquals(result.getEntities().size(), 2); query = STRUCTURED_QUERY_PREFIX + "editedFieldTags:urn\\:li\\:tag\\:Legacy"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1045,7 +1059,7 @@ public void testStructQueryBoolean() { assertEquals(result.getEntities().size(), 1); query = STRUCTURED_QUERY_PREFIX + "tags:urn\\:li\\:tag\\:testTag"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1058,7 +1072,7 @@ public void testStructQueryBoolean() { @Test public void testStructQueryBrowsePaths() { String query = STRUCTURED_QUERY_PREFIX + "browsePaths:*/dbt/*"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1071,7 +1085,7 @@ public void testStructQueryBrowsePaths() { @Test public void testOr() { String query = "stg_customers | logging_events"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1079,7 +1093,7 @@ public void testOr() { assertEquals(result.getEntities().size(), 9); query = "stg_customers"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1087,7 +1101,7 @@ public void testOr() { assertEquals(result.getEntities().size(), 1); query = "logging_events"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1098,7 +1112,7 @@ public void testOr() { @Test public void testNegate() { String query = "logging_events -bckp"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1106,7 +1120,7 @@ public void testNegate() { assertEquals(result.getEntities().size(), 7); query = "logging_events"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1117,7 +1131,7 @@ public void testNegate() { @Test public void testPrefix() { String query = "bigquery"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1125,7 +1139,7 @@ public void testPrefix() { assertEquals(result.getEntities().size(), 8); query = "big*"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1136,7 +1150,7 @@ public void testPrefix() { @Test public void testParens() { String query = "dbt | (bigquery + covid19)"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1144,7 +1158,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 11); query = "dbt"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1152,7 +1166,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 9); query = "bigquery + covid19"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1160,7 +1174,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 2); query = "bigquery"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1168,7 +1182,7 @@ public void testParens() { assertEquals(result.getEntities().size(), 8); query = "covid19"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1179,7 +1193,7 @@ public void testParens() { @Test public void testPrefixVsExact() { String query = "\"customers\""; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1197,7 +1211,7 @@ public void testPrefixVsExact() { public void testPrefixVsExactCaseSensitivity() { List insensitiveExactMatches = List.of("testExactMatchCase", "testexactmatchcase", "TESTEXACTMATCHCASE"); for (String query : insensitiveExactMatches) { - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); @@ -1214,7 +1228,7 @@ public void testPrefixVsExactCaseSensitivity() { @Test public void testColumnExactMatch() { String query = "unit_data"; - SearchResult result = search(searchService, query); + SearchResult result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), @@ -1227,7 +1241,7 @@ public void testColumnExactMatch() { "Expected table name exact match first"); query = "special_column_only_present_here_info"; - result = search(searchService, query); + result = searchAcrossEntities(searchService, query); assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), String.format("%s - Expected search results", query)); assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java index f726eba547b99..55f7d4618f479 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.ESSearchLineageFixture; +import com.linkedin.metadata.ESTestUtils; import com.linkedin.metadata.search.LineageSearchResult; import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.SearchResult; @@ -14,7 +15,6 @@ import java.net.URISyntaxException; -import static com.linkedin.metadata.ESTestUtils.search; import static com.linkedin.metadata.ESTestUtils.lineage; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; @@ -35,10 +35,10 @@ public class SearchLineageDataFixtureTests extends AbstractTestNGSpringContextTe @Test public void testFixtureInitialization() { assertNotNull(searchService); - SearchResult noResult = search(searchService, "no results"); + SearchResult noResult = ESTestUtils.searchAcrossEntities(searchService, "no results"); assertEquals(noResult.getEntities().size(), 0); - SearchResult result = search(searchService, "e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8"); + SearchResult result = ESTestUtils.searchAcrossEntities(searchService, "e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8"); assertEquals(result.getEntities().size(), 1); assertEquals(result.getEntities().get(0).getEntity().toString(), diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java index 780927d21417f..2be719ed263ea 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java @@ -141,7 +141,7 @@ public void invoke(@Nonnull MetadataChangeLog event) { private void handleEntityKeyEvent(DatasetUrn datasetUrn) { Filter entitiesWithYouAsSiblingFilter = createFilterForEntitiesWithYouAsSibling(datasetUrn); final SearchResult searchResult = _searchService.search( - "dataset", + List.of(DATASET_ENTITY_NAME), "*", entitiesWithYouAsSiblingFilter, null, diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java index 3eb21125bbaf2..5fb2cfaaef2d1 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java @@ -255,7 +255,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { Mockito.when( _mockSearchService.search( - anyString(), anyString(), any(), any(), anyInt(), anyInt(), eq(new SearchFlags().setFulltext(false) + any(), anyString(), any(), any(), anyInt(), anyInt(), eq(new SearchFlags().setFulltext(false) .setSkipAggregates(true).setSkipHighlighting(true)) )).thenReturn(returnSearchResult); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndices.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndices.java index 486961c2c1f07..097dcfdfdf52e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndices.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndices.java @@ -74,7 +74,7 @@ public ExecutionMode getExecutionMode() { private int getAndRestoreTermAspectIndices(int start, AuditStamp auditStamp, AspectSpec termAspectSpec) throws Exception { SearchResult termsResult = - _entitySearchService.search(Constants.GLOSSARY_TERM_ENTITY_NAME, "", null, + _entitySearchService.search(List.of(Constants.GLOSSARY_TERM_ENTITY_NAME), "", null, null, start, BATCH_SIZE, new SearchFlags().setFulltext(false) .setSkipAggregates(true).setSkipHighlighting(true)); List termUrns = termsResult.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList()); @@ -116,7 +116,7 @@ null, start, BATCH_SIZE, new SearchFlags().setFulltext(false) } private int getAndRestoreNodeAspectIndices(int start, AuditStamp auditStamp, AspectSpec nodeAspectSpec) throws Exception { - SearchResult nodesResult = _entitySearchService.search(Constants.GLOSSARY_NODE_ENTITY_NAME, "", + SearchResult nodesResult = _entitySearchService.search(List.of(Constants.GLOSSARY_NODE_ENTITY_NAME), "", null, null, start, BATCH_SIZE, new SearchFlags().setFulltext(false) .setSkipAggregates(true).setSkipHighlighting(true)); List nodeUrns = nodesResult.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList()); diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java index 88c63110ee63e..d56fbed07f890 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.mxe.MetadataChangeProposal; +import java.util.List; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -40,7 +41,7 @@ private void mockGetTermInfo(Urn glossaryTermUrn, EntitySearchService mockSearch termInfoAspects.put(Constants.GLOSSARY_TERM_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(new GlossaryTermInfo().setName("test").data()))); Map termInfoResponses = new HashMap<>(); termInfoResponses.put(glossaryTermUrn, new EntityResponse().setUrn(glossaryTermUrn).setAspects(new EnvelopedAspectMap(termInfoAspects))); - Mockito.when(mockSearchService.search(Constants.GLOSSARY_TERM_ENTITY_NAME, "", null, null, 0, 1000, + Mockito.when(mockSearchService.search(List.of(Constants.GLOSSARY_TERM_ENTITY_NAME), "", null, null, 0, 1000, new SearchFlags().setFulltext(false).setSkipAggregates(true).setSkipHighlighting(true))) .thenReturn(new SearchResult().setNumEntities(1).setEntities(new SearchEntityArray(ImmutableList.of(new SearchEntity().setEntity(glossaryTermUrn))))); Mockito.when(mockService.getEntitiesV2( @@ -55,7 +56,7 @@ private void mockGetNodeInfo(Urn glossaryNodeUrn, EntitySearchService mockSearch nodeInfoAspects.put(Constants.GLOSSARY_NODE_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(new GlossaryNodeInfo().setName("test").data()))); Map nodeInfoResponses = new HashMap<>(); nodeInfoResponses.put(glossaryNodeUrn, new EntityResponse().setUrn(glossaryNodeUrn).setAspects(new EnvelopedAspectMap(nodeInfoAspects))); - Mockito.when(mockSearchService.search(Constants.GLOSSARY_NODE_ENTITY_NAME, "", null, null, 0, 1000, + Mockito.when(mockSearchService.search(List.of(Constants.GLOSSARY_NODE_ENTITY_NAME), "", null, null, 0, 1000, new SearchFlags().setFulltext(false).setSkipAggregates(true).setSkipHighlighting(true))) .thenReturn(new SearchResult().setNumEntities(1).setEntities(new SearchEntityArray(ImmutableList.of(new SearchEntity().setEntity(glossaryNodeUrn))))); Mockito.when(mockService.getEntitiesV2( @@ -221,10 +222,10 @@ public void testDoesNotRunWhenAlreadyExecuted() throws Exception { Mockito.verify(mockRegistry, Mockito.times(0)).getEntitySpec(Constants.GLOSSARY_TERM_ENTITY_NAME); Mockito.verify(mockRegistry, Mockito.times(0)).getEntitySpec(Constants.GLOSSARY_NODE_ENTITY_NAME); - Mockito.verify(mockSearchService, Mockito.times(0)).search(Constants.GLOSSARY_TERM_ENTITY_NAME, + Mockito.verify(mockSearchService, Mockito.times(0)).search(List.of(Constants.GLOSSARY_TERM_ENTITY_NAME), "", null, null, 0, 1000, new SearchFlags().setFulltext(false) .setSkipAggregates(true).setSkipHighlighting(true)); - Mockito.verify(mockSearchService, Mockito.times(0)).search(Constants.GLOSSARY_NODE_ENTITY_NAME, + Mockito.verify(mockSearchService, Mockito.times(0)).search(List.of(Constants.GLOSSARY_NODE_ENTITY_NAME), "", null, null, 0, 1000, new SearchFlags().setFulltext(false) .setSkipAggregates(true).setSkipHighlighting(true)); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index b8fd785eaad0f..f6dedfb9a07c6 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -332,7 +332,7 @@ public Task search(@ActionParam(PARAM_ENTITY) @Nonnull String enti () -> { final SearchResult result; // This API is not used by the frontend for search bars so we default to structured - result = _entitySearchService.search(entityName, input, filter, sortCriterion, start, count, searchFlags); + result = _entitySearchService.search(List.of(entityName), input, filter, sortCriterion, start, count, searchFlags); return validateSearchResult(result, _entityService); }, MetricRegistry.name(this.getClass(), "search")); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index de5bdb62f201b..a46b58aabfb0b 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -64,7 +64,7 @@ public interface EntitySearchService { * Safe for non-structured, user input, queries with an attempt to provide some advanced features * Impl * - * @param entityName name of the entity + * @param entityNames names of the entities * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search hits * @param sortCriterion {@link SortCriterion} to be applied to search results @@ -74,7 +74,7 @@ public interface EntitySearchService { * @return a {@link SearchResult} that contains a list of matched documents and related search result metadata */ @Nonnull - SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, + SearchResult search(@Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, int from, int size, @Nullable SearchFlags searchFlags); /** @@ -84,7 +84,7 @@ SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable * Safe for non-structured, user input, queries with an attempt to provide some advanced features * Impl * - * @param entityName name of the entity + * @param entityNames names of the entities * @param input the search input text * @param postFilters the request map with fields and values as filters to be applied to search hits * @param sortCriterion {@link SortCriterion} to be applied to search results @@ -95,7 +95,7 @@ SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable * @return a {@link SearchResult} that contains a list of matched documents and related search result metadata */ @Nonnull - SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, + SearchResult search(@Nonnull List entityNames, @Nonnull String input, @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, int from, int size, @Nullable SearchFlags searchFlags, @Nullable List facets); /** diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json index fc285353f5005..2abe81d93236c 100644 --- a/metadata-service/war/src/main/resources/boot/data_platforms.json +++ b/metadata-service/war/src/main/resources/boot/data_platforms.json @@ -427,7 +427,7 @@ } }, { - "urn": "urn:li:dataPlatform:presto_on_hive", + "urn": "urn:li:dataPlatform:presto-on-hive", "aspect": { "datasetNameDelimiter": ".", "name": "presto-on-hive", diff --git a/mock-entity-registry/src/main/java/mock/MockEntitySpec.java b/mock-entity-registry/src/main/java/mock/MockEntitySpec.java index 097e0845504a1..f43c1f7fd6613 100644 --- a/mock-entity-registry/src/main/java/mock/MockEntitySpec.java +++ b/mock-entity-registry/src/main/java/mock/MockEntitySpec.java @@ -25,6 +25,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static com.linkedin.metadata.Constants.*; @@ -88,7 +89,8 @@ public AspectSpec createAspectSpec(T type, String nam @Override public List getAspectSpecs() { - return Collections.emptyList(); + return ASPECT_TYPE_MAP.keySet().stream().map(name -> createAspectSpec(ASPECT_TYPE_MAP.get(name), name)).collect( + Collectors.toList()); } @Override