From 94d424b7d4deed9fb746afe86247ceb5610620c7 Mon Sep 17 00:00:00 2001 From: Davi Arnaut Date: Thu, 9 May 2024 10:22:33 -0500 Subject: [PATCH 01/15] fix(docs): adjust new requirements for DynamoDB ingestion (#10470) --- docs/how/updating-datahub.md | 2 +- metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index f5b3600d98306..fb769650bce0d 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,7 +20,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes -- #10419 - `aws_session_token` and `aws_region` are now required configurations in the DynamoDB connector. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. +- #10419 - `aws_region` is now a required configuration in the DynamoDB connector. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. ### Potential Downtime diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md index d6815774b7df0..0e85ec7a8cc61 100644 --- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md +++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md @@ -1,6 +1,6 @@ ### Prerequisities -Notice of breaking change: in the latest version of the DynamoDB connector, both `aws_session_token` and `aws_region` are required configurations. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. +Notice of breaking change: in the latest version of the DynamoDB connector, `aws_region` is now a required configuration. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. In order to execute this source, you need to attach the `AmazonDynamoDBReadOnlyAccess` policy to a user in your AWS account. Then create an API access key and secret for the user. From 96c605df711afd049cbac8ef07692f6d9ff7ac93 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 9 May 2024 09:18:26 -0700 Subject: [PATCH 02/15] feat(ingest/redshift): add timers for lineage v2 (#10460) --- .../ingestion/source/redshift/lineage_v2.py | 15 ++++++++++----- .../ingestion/source/redshift/report.py | 2 ++ .../sql_parsing/sql_parsing_aggregator.py | 19 +++++++++++++------ 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py index 45fd1477df44e..797b309f528cc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py @@ -34,6 +34,7 @@ KnownQueryLineageInfo, SqlParsingAggregator, ) +from datahub.utilities.perf_timer import PerfTimer logger = logging.getLogger(__name__) @@ -226,13 +227,17 @@ def _populate_lineage_agg( try: logger.debug(f"Processing {lineage_type.name} lineage query: {query}") - for lineage_row in RedshiftDataDictionary.get_lineage_rows( - conn=connection, query=query - ): - processor(lineage_row) + timer = self.report.lineage_phases_timer.setdefault( + lineage_type.name, PerfTimer() + ) + with timer: + for lineage_row in RedshiftDataDictionary.get_lineage_rows( + conn=connection, query=query + ): + processor(lineage_row) except Exception as e: self.report.warning( - f"extract-{lineage_type.name}", + f"lineage-v2-extract-{lineage_type.name}", f"Error was {e}, {traceback.format_exc()}", ) self._lineage_v1.report_status(f"extract-{lineage_type.name}", False) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py index e2a035091d0ad..2e6cb8051c91e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py @@ -8,6 +8,7 @@ from datahub.ingestion.source_report.time_window import BaseTimeWindowReport from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport from datahub.utilities.lossy_collections import LossyDict +from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.stats_collections import TopKDict @@ -55,6 +56,7 @@ class RedshiftReport( # lineage/usage v2 sql_aggregator: Optional[SqlAggregatorReport] = None + lineage_phases_timer: Dict[str, PerfTimer] = field(default_factory=dict) def report_dropped(self, key: str) -> None: self.filtered.append(key) diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index f06ca650bab9e..530764e8320cd 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -51,6 +51,7 @@ ) from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.ordered_set import OrderedSet +from datahub.utilities.perf_timer import PerfTimer logger = logging.getLogger(__name__) QueryId = str @@ -156,6 +157,10 @@ class SqlAggregatorReport(Report): default_factory=LossyDict ) + # SQL parsing (over all invocations). + num_sql_parsed: int = 0 + sql_parsing_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer) + # Other lineage loading metrics. num_known_query_lineage: int = 0 num_known_mapping_lineage: int = 0 @@ -749,12 +754,14 @@ def _run_sql_parser( timestamp: Optional[datetime] = None, user: Optional[CorpUserUrn] = None, ) -> SqlParsingResult: - parsed = sqlglot_lineage( - query, - schema_resolver=schema_resolver, - default_db=default_db, - default_schema=default_schema, - ) + with self.report.sql_parsing_timer: + parsed = sqlglot_lineage( + query, + schema_resolver=schema_resolver, + default_db=default_db, + default_schema=default_schema, + ) + self.report.num_sql_parsed += 1 # Conditionally log the query. if self.query_log == QueryLogSetting.STORE_ALL or ( From c8bb7dd34af0e3b8efa5a6fd9a0664c20a95bcb7 Mon Sep 17 00:00:00 2001 From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com> Date: Thu, 9 May 2024 10:21:51 -0700 Subject: [PATCH 03/15] feat(fabricType): add fabric type RVW (#10472) --- datahub-graphql-core/src/main/resources/entity.graphql | 5 +++++ docs/how/updating-datahub.md | 1 + li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl | 5 +++++ .../main/snapshot/com.linkedin.entity.aspects.snapshot.json | 5 +++-- .../main/snapshot/com.linkedin.entity.entities.snapshot.json | 5 +++-- .../src/main/snapshot/com.linkedin.entity.runs.snapshot.json | 5 +++-- .../com.linkedin.operations.operations.snapshot.json | 5 +++-- .../snapshot/com.linkedin.platform.platform.snapshot.json | 5 +++-- 8 files changed, 26 insertions(+), 10 deletions(-) diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 296d62bc534a3..1f2642567b49e 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -2685,6 +2685,11 @@ enum FabricType { Designates corporation fabrics """ CORP + + """ + Designates review fabrics + """ + RVW } """ diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index fb769650bce0d..998caf2565dcd 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -21,6 +21,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes - #10419 - `aws_region` is now a required configuration in the DynamoDB connector. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. +- #10472 - `RVW` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. ### Potential Downtime diff --git a/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl b/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl index c5959ac4cc8fa..366843e460cb3 100644 --- a/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl +++ b/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl @@ -54,4 +54,9 @@ enum FabricType { * Designates corporation fabrics */ CORP + + /** + * Designates review fabrics + */ + RVW } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index becdcdd0215fd..00b434d30356f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -845,7 +845,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 96b9b570b2bf1..ffbcdd1b2adb3 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -845,7 +845,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 6100073f1fbc9..0139072b2ae15 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -576,7 +576,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -587,7 +587,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index 0573a342da420..1caeed2570317 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -576,7 +576,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -587,7 +587,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 4a1f24d527b89..1592333988b4c 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -845,7 +845,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", From e7c7015981e4258f9506ebe2026503e7605cb580 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 9 May 2024 13:55:25 -0500 Subject: [PATCH 04/15] feat(structured-properties): immutable flag (#10461) Co-authored-by: Chris Collins --- .../StructuredPropertyMapper.java | 1 + .../src/main/resources/properties.graphql | 5 + .../tabs/Properties/Edit/EditColumn.tsx | 2 +- .../src/graphql/fragments.graphql | 1 + .../StructuredPropertiesValidator.java | 139 +++++++++-- .../StructuredPropertiesValidatorTest.java | 220 ++++++++++++++++++ .../test/metadata/aspect/batch/TestMCP.java | 2 +- .../structuredproperties.py | 2 + .../StructuredPropertyDefinition.pdl | 8 + .../src/main/resources/entity-registry.yml | 1 + .../authorization/PoliciesConfig.java | 16 +- 11 files changed, 377 insertions(+), 20 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java index b3abab5ed3d36..ff54131506a7c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java @@ -60,6 +60,7 @@ private void mapStructuredPropertyDefinition( definition.setQualifiedName(gmsDefinition.getQualifiedName()); definition.setCardinality( PropertyCardinality.valueOf(gmsDefinition.getCardinality().toString())); + definition.setImmutable(gmsDefinition.isImmutable()); definition.setValueType(createDataTypeEntity(gmsDefinition.getValueType())); if (gmsDefinition.hasDisplayName()) { definition.setDisplayName(gmsDefinition.getDisplayName()); diff --git a/datahub-graphql-core/src/main/resources/properties.graphql b/datahub-graphql-core/src/main/resources/properties.graphql index 3bf0bbefc406d..120154e930d59 100644 --- a/datahub-graphql-core/src/main/resources/properties.graphql +++ b/datahub-graphql-core/src/main/resources/properties.graphql @@ -75,6 +75,11 @@ type StructuredPropertyDefinition { Entity types that this structured property can be applied to """ entityTypes: [EntityTypeEntity!]! + + """ + Whether or not this structured property is immutable + """ + immutable: Boolean! } """ diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx index 7ff08e3813863..ac50df6a5381e 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx @@ -10,7 +10,7 @@ interface Props { export function EditColumn({ propertyRow }: Props) { const [isEditModalVisible, setIsEditModalVisible] = useState(false); - if (!propertyRow.structuredProperty) { + if (!propertyRow.structuredProperty || propertyRow.structuredProperty?.definition.immutable) { return null; } diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 7028ac8c4f4d0..b28150a47b753 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -1245,6 +1245,7 @@ fragment structuredPropertyFields on StructuredPropertyEntity { qualifiedName description cardinality + immutable valueType { info { type diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java index 31c0a1a9093f7..f0971ca35a88e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java @@ -27,6 +27,7 @@ import com.linkedin.structured.StructuredProperties; import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.structured.StructuredPropertyValueAssignment; +import com.linkedin.util.Pair; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; @@ -38,9 +39,11 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; /** A Validator for StructuredProperties Aspect that is attached to entities like Datasets, etc. */ @@ -92,20 +95,22 @@ protected Stream validateProposedAspects( @Override protected Stream validatePreCommitAspects( @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { - return Stream.empty(); + return validateImmutable( + changeMCPs.stream() + .filter( + i -> + ChangeType.DELETE.equals(i.getChangeType()) + || CHANGE_TYPES.contains(i.getChangeType())) + .collect(Collectors.toList()), + retrieverContext.getAspectRetriever()); } public static Stream validateProposedUpserts( @Nonnull Collection mcpItems, @Nonnull AspectRetriever aspectRetriever) { ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); - - // Validate propertyUrns - Set validPropertyUrns = validateStructuredPropertyUrns(mcpItems, exceptions); - - // Fetch property aspects for further validation Map> allStructuredPropertiesAspects = - fetchPropertyAspects(validPropertyUrns, aspectRetriever); + fetchPropertyAspects(mcpItems, aspectRetriever, exceptions, false); // Validate assignments for (BatchItem i : exceptions.successful(mcpItems)) { @@ -120,15 +125,13 @@ public static Stream validateProposedUpserts( softDeleteCheck(i, propertyAspects, "Cannot apply a soft deleted Structured Property value") .ifPresent(exceptions::addException); - Aspect structuredPropertyDefinitionAspect = - propertyAspects.get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME); - if (structuredPropertyDefinitionAspect == null) { + StructuredPropertyDefinition structuredPropertyDefinition = + lookupPropertyDefinition(propertyUrn, allStructuredPropertiesAspects); + if (structuredPropertyDefinition == null) { exceptions.addException(i, "Unexpected null value found."); } - StructuredPropertyDefinition structuredPropertyDefinition = - new StructuredPropertyDefinition(structuredPropertyDefinitionAspect.data()); - log.warn( + log.debug( "Retrieved property definition for {}. {}", propertyUrn, structuredPropertyDefinition); if (structuredPropertyDefinition != null) { PrimitivePropertyValueArray values = structuredPropertyValueAssignment.getValues(); @@ -158,8 +161,73 @@ public static Stream validateProposedUpserts( return exceptions.streamAllExceptions(); } + public static Stream validateImmutable( + @Nonnull Collection changeMCPs, @Nonnull AspectRetriever aspectRetriever) { + + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + final Map> allStructuredPropertiesAspects = + fetchPropertyAspects(changeMCPs, aspectRetriever, exceptions, true); + + Set immutablePropertyUrns = + allStructuredPropertiesAspects.keySet().stream() + .map( + stringAspectMap -> + Pair.of( + stringAspectMap, + lookupPropertyDefinition(stringAspectMap, allStructuredPropertiesAspects))) + .filter(defPair -> defPair.getSecond() != null && defPair.getSecond().isImmutable()) + .map(Pair::getFirst) + .collect(Collectors.toSet()); + + // Validate immutable assignments + for (ChangeMCP i : exceptions.successful(changeMCPs)) { + + // only apply immutable validation if previous properties exist + if (i.getPreviousRecordTemplate() != null) { + Map newImmutablePropertyMap = + i.getAspect(StructuredProperties.class).getProperties().stream() + .filter(assign -> immutablePropertyUrns.contains(assign.getPropertyUrn())) + .collect( + Collectors.toMap( + StructuredPropertyValueAssignment::getPropertyUrn, Function.identity())); + Map oldImmutablePropertyMap = + i.getPreviousAspect(StructuredProperties.class).getProperties().stream() + .filter(assign -> immutablePropertyUrns.contains(assign.getPropertyUrn())) + .collect( + Collectors.toMap( + StructuredPropertyValueAssignment::getPropertyUrn, Function.identity())); + + // upsert/mutation path + newImmutablePropertyMap + .entrySet() + .forEach( + entry -> { + Urn propertyUrn = entry.getKey(); + StructuredPropertyValueAssignment assignment = entry.getValue(); + + if (oldImmutablePropertyMap.containsKey(propertyUrn) + && !oldImmutablePropertyMap.get(propertyUrn).equals(assignment)) { + exceptions.addException( + i, String.format("Cannot mutate an immutable property: %s", propertyUrn)); + } + }); + + // delete path + oldImmutablePropertyMap.entrySet().stream() + .filter(entry -> !newImmutablePropertyMap.containsKey(entry.getKey())) + .forEach( + entry -> + exceptions.addException( + i, + String.format("Cannot delete an immutable property %s", entry.getKey()))); + } + } + + return exceptions.streamAllExceptions(); + } + private static Set validateStructuredPropertyUrns( - Collection mcpItems, ValidationExceptionCollection exceptions) { + Collection mcpItems, ValidationExceptionCollection exceptions) { Set validPropertyUrns = new HashSet<>(); for (BatchItem i : exceptions.successful(mcpItems)) { @@ -202,6 +270,17 @@ private static Set validateStructuredPropertyUrns( return validPropertyUrns; } + private static Set previousStructuredPropertyUrns(Collection mcpItems) { + return mcpItems.stream() + .filter(i -> i instanceof ChangeMCP) + .map(i -> ((ChangeMCP) i)) + .filter(i -> i.getPreviousRecordTemplate() != null) + .flatMap(i -> i.getPreviousAspect(StructuredProperties.class).getProperties().stream()) + .map(StructuredPropertyValueAssignment::getPropertyUrn) + .filter(propertyUrn -> propertyUrn.getEntityType().equals("structuredProperty")) + .collect(Collectors.toSet()); + } + private static Optional validateAllowedValues( BatchItem item, Urn propertyUrn, @@ -338,14 +417,40 @@ private static String getValueTypeId(@Nonnull final Urn valueType) { } private static Map> fetchPropertyAspects( - Set structuredPropertyUrns, AspectRetriever aspectRetriever) { - if (structuredPropertyUrns.isEmpty()) { + @Nonnull Collection mcpItems, + AspectRetriever aspectRetriever, + @Nonnull ValidationExceptionCollection exceptions, + boolean includePrevious) { + + // Validate propertyUrns + Set validPropertyUrns = + Stream.concat( + validateStructuredPropertyUrns(mcpItems, exceptions).stream(), + includePrevious + ? previousStructuredPropertyUrns(mcpItems).stream() + : Stream.empty()) + .collect(Collectors.toSet()); + + if (validPropertyUrns.isEmpty()) { return Collections.emptyMap(); } else { return aspectRetriever.getLatestAspectObjects( - structuredPropertyUrns, + validPropertyUrns, ImmutableSet.of( Constants.STATUS_ASPECT_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)); } } + + @Nullable + private static StructuredPropertyDefinition lookupPropertyDefinition( + @Nonnull Urn propertyUrn, + @Nonnull Map> allStructuredPropertiesAspects) { + Map propertyAspects = + allStructuredPropertiesAspects.getOrDefault(propertyUrn, Collections.emptyMap()); + Aspect structuredPropertyDefinitionAspect = + propertyAspects.get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME); + return structuredPropertyDefinitionAspect == null + ? null + : new StructuredPropertyDefinition(structuredPropertyDefinitionAspect.data()); + } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java index 5d63d8c8ba5e7..77cf453f517be 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java @@ -4,6 +4,9 @@ import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.structured.PrimitivePropertyValue; @@ -19,6 +22,9 @@ import com.linkedin.test.metadata.aspect.batch.TestMCP; import java.net.URISyntaxException; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.testng.Assert; import org.testng.annotations.Test; @@ -26,6 +32,9 @@ public class StructuredPropertiesValidatorTest { private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final Urn TEST_DATASET_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:datahub,Test,PROD)"); + @Test public void testValidateAspectNumberUpsert() throws URISyntaxException { Urn propertyUrn = @@ -268,4 +277,215 @@ propertyUrn, numberPropertyDef, new Status().setRemoved(true))) 1, "Should have raised exception for soft deleted definition"); } + + @Test + public void testValidateImmutableMutation() throws URISyntaxException { + Urn mutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.mutableProperty"); + StructuredPropertyDefinition mutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(false) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment mutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(mutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties mutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(mutableAssignment)); + + Urn immutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.immutableProperty"); + StructuredPropertyDefinition immutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(true) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment immutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(immutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties immutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(immutableAssignment)); + + // No previous values for either + boolean noPreviousValid = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP(TEST_DATASET_URN, null, mutablePayload, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, null, immutablePayload, TEST_REGISTRY) + .stream()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .count() + == 0; + Assert.assertTrue(noPreviousValid); + + // Unchanged values of previous (no issues with immutability) + boolean noChangeValid = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, mutablePayload, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, immutablePayload, TEST_REGISTRY) + .stream()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .count() + == 0; + Assert.assertTrue(noChangeValid); + + // invalid + StructuredPropertyValueAssignment immutableAssignment2 = + new StructuredPropertyValueAssignment() + .setPropertyUrn(immutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(60.0))); + StructuredProperties immutablePayload2 = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(immutableAssignment2)); + + List exceptions = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, mutablePayload, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, immutablePayload2, TEST_REGISTRY) + .stream()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .collect(Collectors.toList()); + + Assert.assertEquals(exceptions.size(), 1, "Expected rejected mutation of immutable property."); + Assert.assertEquals(exceptions.get(0).getExceptionKey().getKey(), TEST_DATASET_URN); + Assert.assertTrue( + exceptions.get(0).getMessage().contains("Cannot mutate an immutable property")); + } + + @Test + public void testValidateImmutableDelete() throws URISyntaxException { + final StructuredProperties emptyProperties = + new StructuredProperties().setProperties(new StructuredPropertyValueAssignmentArray()); + + Urn mutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.mutableProperty"); + StructuredPropertyDefinition mutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(false) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment mutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(mutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties mutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(mutableAssignment)); + + Urn immutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.immutableProperty"); + StructuredPropertyDefinition immutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(true) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment immutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(immutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties immutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(immutableAssignment)); + + // Delete mutable, Delete with no-op for immutable allowed + boolean noPreviousValid = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, emptyProperties, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, immutablePayload, TEST_REGISTRY) + .stream()) + // set to DELETE + .map(i -> ((TestMCP) i).toBuilder().changeType(ChangeType.DELETE).build()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .count() + == 0; + Assert.assertTrue(noPreviousValid); + + // invalid (delete of mutable allowed, delete of immutable denied) + List exceptions = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, emptyProperties, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, emptyProperties, TEST_REGISTRY) + .stream()) + // set to DELETE + .map(i -> ((TestMCP) i).toBuilder().changeType(ChangeType.DELETE).build()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .collect(Collectors.toList()); + + Assert.assertEquals(exceptions.size(), 1, "Expected rejected delete of immutable property."); + Assert.assertEquals(exceptions.get(0).getExceptionKey().getKey(), TEST_DATASET_URN); + Assert.assertTrue( + exceptions.get(0).getMessage().contains("Cannot delete an immutable property")); + } } diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java index 20d01dc55934a..1e1efe4238187 100644 --- a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java +++ b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java @@ -27,7 +27,7 @@ import lombok.Getter; import lombok.Setter; -@Builder +@Builder(toBuilder = true) @Getter public class TestMCP implements ChangeMCP { private static final String TEST_DATASET_URN = diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py index ab40db5253fd1..eacbff4b31d93 100644 --- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py +++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py @@ -75,6 +75,7 @@ class StructuredProperties(ConfigModel): cardinality: Optional[str] = None allowed_values: Optional[List[AllowedValue]] = None type_qualifier: Optional[TypeQualifierAllowedTypes] = None + immutable: Optional[bool] = False @property def fqn(self) -> str: @@ -124,6 +125,7 @@ def create(file: str) -> None: for entity_type in structuredproperty.entity_types or [] ], cardinality=structuredproperty.cardinality, + immutable=structuredproperty.immutable, allowedValues=[ PropertyValueClass( value=v.value, description=v.description diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl index 1b263b679531a..178d7b3cf4376 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl @@ -70,5 +70,13 @@ record StructuredPropertyDefinition { * from the logical type. */ searchConfiguration: optional DataHubSearchConfig + + /** + * Whether the structured property value is immutable once applied to an entity. + */ + @Searchable = { + "fieldType": "BOOLEAN" + } + immutable: boolean = false } diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index d7ab1f948b411..a9301076d4e82 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -588,6 +588,7 @@ plugins: supportedOperations: - CREATE - UPSERT + - DELETE supportedEntityAspectNames: - entityName: '*' aspectName: structuredProperties diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 6788f6e87fc0d..342c492b01b2e 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -653,6 +653,19 @@ public class PoliciesConfig { CREATE_ENTITY_PRIVILEGE, EXISTS_ENTITY_PRIVILEGE)); + // Properties Privileges + public static final ResourcePrivileges STRUCTURED_PROPERTIES_PRIVILEGES = + ResourcePrivileges.of( + "structuredProperty", + "Structured Properties", + "Structured Properties", + ImmutableList.of( + CREATE_ENTITY_PRIVILEGE, + VIEW_ENTITY_PAGE_PRIVILEGE, + EXISTS_ENTITY_PRIVILEGE, + EDIT_ENTITY_PRIVILEGE, + DELETE_ENTITY_PRIVILEGE)); + // ERModelRelationship Privileges public static final ResourcePrivileges ER_MODEL_RELATIONSHIP_PRIVILEGES = ResourcePrivileges.of( @@ -689,7 +702,8 @@ public class PoliciesConfig { NOTEBOOK_PRIVILEGES, DATA_PRODUCT_PRIVILEGES, ER_MODEL_RELATIONSHIP_PRIVILEGES, - BUSINESS_ATTRIBUTE_PRIVILEGES); + BUSINESS_ATTRIBUTE_PRIVILEGES, + STRUCTURED_PROPERTIES_PRIVILEGES); // Merge all entity specific resource privileges to create a superset of all resource privileges public static final ResourcePrivileges ALL_RESOURCE_PRIVILEGES = From 162b6e94a67f9adb431c1e1aec07e4aa16bd4f31 Mon Sep 17 00:00:00 2001 From: Davi Arnaut Date: Thu, 9 May 2024 14:40:44 -0500 Subject: [PATCH 05/15] fix(docker): mount newly added jetty-jmx.xml (#10475) --- docker/docker-compose.dev.yml | 1 + docker/profiles/docker-compose.gms.yml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index b6ac43a9eda43..7974b66ec87db 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -51,6 +51,7 @@ services: volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml + - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../metadata-service/war/build/libs/:/datahub/datahub-gms/bin diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 208ed763dd6a3..76bdcacd2dfc9 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -130,6 +130,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml + - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../../metadata-service/war/build/libs/:/datahub/datahub-gms/bin @@ -423,4 +424,4 @@ services: - debug-consumers depends_on: datahub-gms-debug-consumers: - condition: service_healthy \ No newline at end of file + condition: service_healthy From 6ed21bd1bc70a3ceb7dddb43ea7db4ca56874547 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 9 May 2024 14:56:03 -0500 Subject: [PATCH 06/15] feat(plugins): spring custom plugins (#10389) Co-authored-by: Kevin Chun Co-authored-by: Kevin Chun --- build.gradle | 2 +- .../graphql/types/mappers/MapperUtils.java | 4 +- .../types/mappers/MapperUtilsTest.java | 4 +- docs/how/updating-datahub.md | 1 + entity-registry/build.gradle | 2 +- .../hooks/StructuredPropertiesSoftDelete.java | 10 +- .../aspect/plugins/PluginFactory.java | 224 ++++++++++++------ .../metadata/aspect/plugins/PluginSpec.java | 9 +- .../plugins/config/AspectPluginConfig.java | 35 ++- .../plugins/config/PluginConfiguration.java | 60 +++++ .../aspect/plugins/hooks/MCLSideEffect.java | 5 - .../aspect/plugins/hooks/MCPSideEffect.java | 5 - .../aspect/plugins/hooks/MutationHook.java | 5 - .../validation/AspectPayloadValidator.java | 5 - .../CreateIfNotExistsValidator.java | 10 +- .../PropertyDefinitionValidator.java | 11 +- .../StructuredPropertiesValidator.java | 12 +- .../models/registry/ConfigEntityRegistry.java | 64 ++++- .../models/registry/EntityRegistry.java | 8 + .../models/registry/MergedEntityRegistry.java | 19 +- .../models/registry/PatchEntityRegistry.java | 44 +++- .../registry/PluginEntityRegistryLoader.java | 20 +- .../StructuredPropertiesSoftDeleteTest.java | 23 +- .../plugins/hooks/MCLSideEffectTest.java | 37 +-- .../plugins/hooks/MCPSideEffectTest.java | 37 +-- .../plugins/hooks/MutationPluginTest.java | 37 +-- .../validation/ValidatorPluginTest.java | 64 ++--- .../CreateIfNotExistsValidatorTest.java | 8 +- .../PropertyDefinitionValidatorTest.java | 33 +-- .../models/OpenApiSpecBuilderTest.java | 2 +- .../registry/PatchEntityRegistryTest.java | 6 +- .../PluginEntityRegistryLoaderTest.java | 6 +- .../java/custom-plugin-lib/build.gradle | 116 +++++++++ metadata-io/build.gradle | 1 + metadata-io/metadata-io-api/README.txt | 4 + metadata-io/metadata-io-api/build.gradle | 11 + .../metadata/entity/EntityApiUtils.java | 60 +++++ .../metadata/entity/EntityAspect.java | 12 +- .../entity/ebean/batch/AspectsBatchImpl.java | 0 .../entity/ebean/batch/ChangeItemImpl.java | 25 +- .../entity/ebean/batch/DeleteItemImpl.java | 10 +- .../entity/ebean/batch/MCLItemImpl.java | 13 +- .../entity/ebean/batch/PatchItemImpl.java | 6 +- .../EntityRegistryUrnValidator.java | 0 .../validation/RecordTemplateValidator.java | 0 .../entity/validation/ValidationApiUtils.java | 124 ++++++++++ .../validation/ValidationException.java | 0 .../aspect/utils/DefaultAspectsUtil.java | 4 +- .../entity/EntityAspectIdentifier.java | 10 + .../metadata/entity/EntityServiceImpl.java | 15 +- .../linkedin/metadata/entity/EntityUtils.java | 43 +--- .../entity/cassandra/CassandraAspectDao.java | 2 +- .../entity/validation/ValidationUtils.java | 116 +-------- .../metadata/entity/EntityServiceTest.java | 44 ++-- .../metadata/entity/ValidationUtilsTest.java | 14 +- .../EntityChangeEventGeneratorHookTest.java | 3 - ...LSpringTest.java => MCLGMSSpringTest.java} | 8 +- .../kafka/hook/spring/MCLMAESpringTest.java | 56 +++++ ... => MCLSpringCommonTestConfiguration.java} | 5 +- .../spring/MCLSpringGMSTestConfiguration.java | 8 + metadata-models-custom/README.md | 69 +++++- metadata-models-custom/build.gradle | 21 +- .../registry/entity-registry.yaml | 11 + .../CustomDataQualityRulesMCLSideEffect.java | 16 +- .../CustomDataQualityRulesMCPSideEffect.java | 16 +- .../hooks/CustomDataQualityRulesMutator.java | 16 +- .../CustomDataQualityRulesConfig.java | 12 + .../CustomDataQualityRulesValidator.java | 111 +++++++++ .../CustomDataQualityRulesValidator.java | 16 +- .../ConfigEntityRegistryFactory.java | 16 +- .../PluginEntityRegistryFactory.java | 14 +- .../OpenAPIEntityTestConfiguration.java | 2 +- .../v2/controller/EntityController.java | 24 +- .../openapi/v3/OpenAPIV3Generator.java | 44 +++- metadata-service/plugin/build.gradle | 2 + .../metadata/aspect/SpringPluginFactory.java | 132 +++++++++++ .../resources/entity/EntityResource.java | 1 + settings.gradle | 2 + 78 files changed, 1494 insertions(+), 553 deletions(-) create mode 100644 metadata-integration/java/custom-plugin-lib/build.gradle create mode 100644 metadata-io/metadata-io-api/README.txt create mode 100644 metadata-io/metadata-io-api/build.gradle create mode 100644 metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/EntityAspect.java (92%) rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java (100%) rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java (91%) rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java (90%) rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java (92%) rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java (97%) rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java (100%) rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java (100%) create mode 100644 metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java rename metadata-io/{ => metadata-io-api}/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java (100%) rename metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/{MCLSpringTest.java => MCLGMSSpringTest.java} (91%) create mode 100644 metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java rename metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/{MCLSpringTestConfiguration.java => MCLSpringCommonTestConfiguration.java} (96%) create mode 100644 metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringGMSTestConfiguration.java create mode 100644 metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesConfig.java create mode 100644 metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java create mode 100644 metadata-service/plugin/src/main/java/com/datahub/plugins/metadata/aspect/SpringPluginFactory.java diff --git a/build.gradle b/build.gradle index 58e7a2ba3a8d6..5264c1c58313c 100644 --- a/build.gradle +++ b/build.gradle @@ -272,7 +272,7 @@ project.ext.externalDependency = [ 'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0', 'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2', 'jakartaAnnotationApi': 'jakarta.annotation:jakarta.annotation-api:3.0.0', - 'classGraph': 'io.github.classgraph:classgraph:4.8.168', + 'classGraph': 'io.github.classgraph:classgraph:4.8.172', ] allprojects { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java index 6bda333256a4c..7dd12d62765c6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java @@ -14,7 +14,7 @@ import com.linkedin.datahub.graphql.generated.SearchSuggestion; import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.utils.SearchUtils; import java.net.URISyntaxException; @@ -89,7 +89,7 @@ public static List getMatchedFieldEntry( if (SearchUtils.isUrn(field.getValue())) { try { Urn urn = Urn.createFromString(field.getValue()); - ValidationUtils.validateUrn( + ValidationApiUtils.validateUrn( context.getOperationContext().getEntityRegistry(), urn); matchedField.setEntity(UrnToEntityMapper.map(context, urn)); } catch (IllegalArgumentException | URISyntaxException e) { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java index 927d5185a71c7..6d3291736f571 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java @@ -9,7 +9,7 @@ import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.MatchedField; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.snapshot.Snapshot; @@ -42,7 +42,7 @@ public void testMatchedFieldValidation() throws URISyntaxException { "urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29"); assertThrows( IllegalArgumentException.class, - () -> ValidationUtils.validateUrn(entityRegistry, invalidUrn)); + () -> ValidationApiUtils.validateUrn(entityRegistry, invalidUrn)); QueryContext mockContext = mock(QueryContext.class); when(mockContext.getOperationContext()) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 998caf2565dcd..ba4708002ed21 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -21,6 +21,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes - #10419 - `aws_region` is now a required configuration in the DynamoDB connector. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. +- #10389 - Custom validators, mutators, side-effects dropped a previously required constructor - #10472 - `RVW` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. ### Potential Downtime diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index e9c9537483474..484a1f3271dbb 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -16,7 +16,7 @@ dependencies { implementation externalDependency.jacksonDataFormatYaml implementation externalDependency.reflections - implementation externalDependency.jsonPatch + api externalDependency.jsonPatch implementation externalDependency.jsonPathImpl constraints { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java index dbe700219946c..5efb1e8aebb06 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java @@ -13,11 +13,15 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +@Getter +@Setter +@Accessors(chain = true) public class StructuredPropertiesSoftDelete extends MutationHook { - public StructuredPropertiesSoftDelete(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + @Nonnull private AspectPluginConfig config; @Override protected Stream> readMutation( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java index 66b362542ff7f..7fa1454691df9 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java @@ -16,6 +16,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -24,21 +26,13 @@ import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.ArrayUtils; @Slf4j public class PluginFactory { - private static final String[] VALIDATOR_PACKAGES = { - "com.linkedin.metadata.aspect.plugins.validation", "com.linkedin.metadata.aspect.validation" - }; - private static final String[] HOOK_PACKAGES = { - "com.linkedin.metadata.aspect.plugins.hooks", "com.linkedin.metadata.aspect.hooks" - }; - public static PluginFactory withCustomClasspath( @Nullable PluginConfiguration pluginConfiguration, @Nonnull List classLoaders) { - return new PluginFactory(pluginConfiguration, classLoaders); + return new PluginFactory(pluginConfiguration, classLoaders).loadPlugins(); } public static PluginFactory withConfig(@Nullable PluginConfiguration pluginConfiguration) { @@ -49,44 +43,135 @@ public static PluginFactory empty() { return PluginFactory.withConfig(PluginConfiguration.EMPTY); } - public static PluginFactory merge(PluginFactory a, PluginFactory b) { - return PluginFactory.withCustomClasspath( - PluginConfiguration.merge(a.getPluginConfiguration(), b.getPluginConfiguration()), + public static PluginFactory merge( + PluginFactory a, + PluginFactory b, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { + PluginConfiguration mergedPluginConfig = + PluginConfiguration.merge(a.pluginConfiguration, b.pluginConfiguration); + List mergedClassLoaders = Stream.concat(a.getClassLoaders().stream(), b.getClassLoaders().stream()) - .collect(Collectors.toList())); + .collect(Collectors.toList()); + + if (pluginFactoryProvider != null) { + return pluginFactoryProvider.apply(mergedPluginConfig, mergedClassLoaders); + } else { + return PluginFactory.withCustomClasspath(mergedPluginConfig, mergedClassLoaders); + } } @Getter private final PluginConfiguration pluginConfiguration; @Nonnull @Getter private final List classLoaders; - @Getter private final List aspectPayloadValidators; - @Getter private final List mutationHooks; - @Getter private final List mclSideEffects; - @Getter private final List mcpSideEffects; + @Getter private List aspectPayloadValidators; + @Getter private List mutationHooks; + @Getter private List mclSideEffects; + @Getter private List mcpSideEffects; - private final ClassGraph classGraph; + private static final Map> pluginCache = new ConcurrentHashMap<>(); public PluginFactory( @Nullable PluginConfiguration pluginConfiguration, @Nonnull List classLoaders) { - this.classGraph = - new ClassGraph() - .acceptPackages(ArrayUtils.addAll(HOOK_PACKAGES, VALIDATOR_PACKAGES)) - .enableRemoteJarScanning() - .enableExternalClasses() - .enableClassInfo() - .enableMethodInfo(); - this.classLoaders = classLoaders; - - if (!this.classLoaders.isEmpty()) { - classLoaders.forEach(this.classGraph::addClassLoader); - } - this.pluginConfiguration = pluginConfiguration == null ? PluginConfiguration.EMPTY : pluginConfiguration; + } + + public PluginFactory loadPlugins() { this.aspectPayloadValidators = buildAspectPayloadValidators(this.pluginConfiguration); this.mutationHooks = buildMutationHooks(this.pluginConfiguration); this.mclSideEffects = buildMCLSideEffects(this.pluginConfiguration); this.mcpSideEffects = buildMCPSideEffects(this.pluginConfiguration); + return this; + } + + /** + * Memory intensive operation because of the size of the jars. Limit packages, classes scanned, + * cache results + * + * @param configs plugin configurations + * @return auto-closeable scan result + */ + protected static List initPlugins( + @Nonnull List classLoaders, + @Nonnull Class baseClazz, + @Nonnull List packageNames, + @Nonnull List configs) { + + List classNames = + configs.stream().map(AspectPluginConfig::getClassName).collect(Collectors.toList()); + + if (classNames.isEmpty()) { + return Collections.emptyList(); + } else { + long key = + IntStream.concat( + classLoaders.stream().mapToInt(Object::hashCode), + IntStream.concat( + IntStream.of(baseClazz.getName().hashCode()), + configs.stream().mapToInt(AspectPluginConfig::hashCode))) + .sum(); + + return (List) + pluginCache.computeIfAbsent( + key, + k -> { + try { + ClassGraph classGraph = + new ClassGraph() + .acceptPackages(packageNames.stream().distinct().toArray(String[]::new)) + .acceptClasses(classNames.stream().distinct().toArray(String[]::new)) + .enableRemoteJarScanning() + .enableExternalClasses() + .enableClassInfo() + .enableMethodInfo(); + if (!classLoaders.isEmpty()) { + classLoaders.forEach(classGraph::addClassLoader); + } + + try (ScanResult scanResult = classGraph.scan()) { + Map classMap = + scanResult.getSubclasses(baseClazz).stream() + .collect(Collectors.toMap(ClassInfo::getName, Function.identity())); + + return configs.stream() + .map( + config -> { + try { + ClassInfo classInfo = classMap.get(config.getClassName()); + if (classInfo == null) { + throw new IllegalStateException( + String.format( + "The following class cannot be loaded: %s", + config.getClassName())); + } + MethodInfo constructorMethod = + classInfo.getConstructorInfo().get(0); + return ((T) + constructorMethod + .loadClassAndGetConstructor() + .newInstance()) + .setConfig(config); + } catch (Exception e) { + log.error( + "Error constructing entity registry plugin class: {}", + config.getClassName(), + e); + return Stream.empty(); + } + }) + .map(plugin -> (T) plugin) + .filter(PluginSpec::enabled) + .collect(Collectors.toList()); + } + } catch (Exception e) { + throw new IllegalArgumentException( + String.format( + "Failed to load entity registry plugins: %s.", baseClazz.getName()), + e); + } + }); + } } /** @@ -187,15 +272,18 @@ private List buildAspectPayloadValidators( : applyDisable( build( AspectPayloadValidator.class, - pluginConfiguration.getAspectPayloadValidators(), - VALIDATOR_PACKAGES)); + pluginConfiguration.validatorPackages(), + pluginConfiguration.getAspectPayloadValidators())); } private List buildMutationHooks(@Nullable PluginConfiguration pluginConfiguration) { return pluginConfiguration == null ? Collections.emptyList() : applyDisable( - build(MutationHook.class, pluginConfiguration.getMutationHooks(), HOOK_PACKAGES)); + build( + MutationHook.class, + pluginConfiguration.mutationPackages(), + pluginConfiguration.getMutationHooks())); } private List buildMCLSideEffects( @@ -203,7 +291,10 @@ private List buildMCLSideEffects( return pluginConfiguration == null ? Collections.emptyList() : applyDisable( - build(MCLSideEffect.class, pluginConfiguration.getMclSideEffects(), HOOK_PACKAGES)); + build( + MCLSideEffect.class, + pluginConfiguration.mclSideEffectPackages(), + pluginConfiguration.getMclSideEffects())); } private List buildMCPSideEffects( @@ -211,44 +302,37 @@ private List buildMCPSideEffects( return pluginConfiguration == null ? Collections.emptyList() : applyDisable( - build(MCPSideEffect.class, pluginConfiguration.getMcpSideEffects(), HOOK_PACKAGES)); + build( + MCPSideEffect.class, + pluginConfiguration.mcpSideEffectPackages(), + pluginConfiguration.getMcpSideEffects())); } - private List build( - Class baseClazz, List configs, String... packageNames) { - try (ScanResult scanResult = classGraph.acceptPackages(packageNames).scan()) { - - Map classMap = - scanResult.getSubclasses(baseClazz).stream() - .collect(Collectors.toMap(ClassInfo::getName, Function.identity())); - - return configs.stream() - .flatMap( - config -> { - try { - ClassInfo classInfo = classMap.get(config.getClassName()); - if (classInfo == null) { - throw new IllegalStateException( - String.format( - "The following class cannot be loaded: %s", config.getClassName())); - } - MethodInfo constructorMethod = classInfo.getConstructorInfo().get(0); - return Stream.of( - (T) constructorMethod.loadClassAndGetConstructor().newInstance(config)); - } catch (Exception e) { - log.error( - "Error constructing entity registry plugin class: {}", - config.getClassName(), - e); - return Stream.empty(); - } - }) - .collect(Collectors.toList()); + /** + * Load plugins given the base class (i.e. a validator) and the name of the implementing class + * found in the configuration objects. + * + *

For performance reasons, scan the packages found in packageNames + * + *

Designed to avoid any Spring dependency, see alternative implementation for Spring + * + * @param baseClazz base class for the plugin + * @param configs configuration with implementing class information + * @param packageNames package names to scan + * @return list of plugin instances + * @param the plugin class + */ + protected List build( + Class baseClazz, List packageNames, List configs) { + List nonSpringConfigs = + configs.stream() + .filter( + config -> + config.getSpring() == null + || Boolean.FALSE.equals(config.getSpring().isEnabled())) + .collect(Collectors.toList()); - } catch (Exception e) { - throw new IllegalArgumentException( - String.format("Failed to load entity registry plugins: %s.", baseClazz.getName()), e); - } + return initPlugins(classLoaders, baseClazz, packageNames, nonSpringConfigs); } @Nonnull diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java index 564fbf32e809f..1adb1be81ecc1 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java @@ -15,10 +15,13 @@ public abstract class PluginSpec { protected static String ENTITY_WILDCARD = "*"; - private final AspectPluginConfig aspectPluginConfig; + @Nonnull + public abstract AspectPluginConfig getConfig(); - protected AspectPluginConfig getConfig() { - return this.aspectPluginConfig; + public abstract PluginSpec setConfig(@Nonnull AspectPluginConfig config); + + public boolean enabled() { + return true; } public boolean shouldApply( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java index 8d9a8d6fc6a69..e10bdd98cd18a 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java @@ -16,21 +16,14 @@ @Builder public class AspectPluginConfig { @Nonnull private String className; + @Nullable private List packageScan; + private boolean enabled; @Nullable private List supportedOperations; @Nonnull private List supportedEntityAspectNames; - @Data - @NoArgsConstructor - @AllArgsConstructor - @Builder - public static class EntityAspectName { - public static final EntityAspectName ALL = new EntityAspectName("*", "*"); - - @Nonnull private String entityName; - @Nonnull private String aspectName; - } + @Nullable private SpringPluginConfig spring; @Nonnull public List getSupportedOperations() { @@ -47,6 +40,26 @@ public boolean isDisabledBy(AspectPluginConfig o) { return enabled && this.isEqualExcludingEnabled(o) && !o.enabled; } + @Data + @NoArgsConstructor + @AllArgsConstructor + @Builder + public static class EntityAspectName { + public static final EntityAspectName ALL = new EntityAspectName("*", "*"); + + @Nonnull private String entityName; + @Nonnull private String aspectName; + } + + @Data + @NoArgsConstructor + @AllArgsConstructor + @Builder + public static class SpringPluginConfig { + private boolean enabled; + @Nullable private String name; + } + private boolean isEqualExcludingEnabled(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; @@ -54,7 +67,9 @@ private boolean isEqualExcludingEnabled(Object o) { AspectPluginConfig that = (AspectPluginConfig) o; if (!className.equals(that.className)) return false; + if (!Objects.equals(packageScan, that.getPackageScan())) return false; if (!Objects.equals(supportedOperations, that.supportedOperations)) return false; + if (!Objects.equals(spring, that.spring)) return false; return supportedEntityAspectNames.equals(that.supportedEntityAspectNames); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java index a2caab7be5f80..e9494c49a9efb 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.aspect.plugins.config; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -12,6 +13,13 @@ @AllArgsConstructor @NoArgsConstructor public class PluginConfiguration { + private static final String[] VALIDATOR_PACKAGES = { + "com.linkedin.metadata.aspect.plugins.validation", "com.linkedin.metadata.aspect.validation" + }; + private static final String[] HOOK_PACKAGES = { + "com.linkedin.metadata.aspect.plugins.hooks", "com.linkedin.metadata.aspect.hooks" + }; + private List aspectPayloadValidators = Collections.emptyList(); private List mutationHooks = Collections.emptyList(); private List mclSideEffects = Collections.emptyList(); @@ -31,4 +39,56 @@ public static PluginConfiguration merge(PluginConfiguration a, PluginConfigurati Stream.concat(a.getMcpSideEffects().stream(), b.getMcpSideEffects().stream()) .collect(Collectors.toList())); } + + public Stream streamAll() { + return Stream.concat( + Stream.concat( + Stream.concat(aspectPayloadValidators.stream(), mutationHooks.stream()), + mclSideEffects.stream()), + mcpSideEffects.stream()); + } + + public List validatorPackages() { + return aspectPayloadValidators.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(VALIDATOR_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } + + public List mcpSideEffectPackages() { + return mcpSideEffects.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(HOOK_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } + + public List mclSideEffectPackages() { + return mclSideEffects.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(HOOK_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } + + public List mutationPackages() { + return mutationHooks.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(HOOK_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java index 902e928c13771..57016404648d5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java @@ -3,7 +3,6 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import java.util.Collection; import java.util.function.BiFunction; import java.util.stream.Collectors; @@ -14,10 +13,6 @@ public abstract class MCLSideEffect extends PluginSpec implements BiFunction, RetrieverContext, Stream> { - public MCLSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Given a list of MCLs, output additional MCLs * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java index e8c703fa20717..845f967c0a528 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java @@ -3,7 +3,6 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import java.util.Collection; import java.util.function.BiFunction; import java.util.stream.Collectors; @@ -14,10 +13,6 @@ public abstract class MCPSideEffect extends PluginSpec implements BiFunction, RetrieverContext, Stream> { - public MCPSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Given the list of MCP upserts, output additional upserts * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java index 7ec6c7adfac46..c067954912a03 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java @@ -4,7 +4,6 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.util.Pair; import java.util.Collection; import java.util.stream.Collectors; @@ -14,10 +13,6 @@ /** Applies changes to the RecordTemplate prior to write */ public abstract class MutationHook extends PluginSpec { - public MutationHook(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Mutating hook, original objects are potentially modified. * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java index fd03ca86d74a8..b39c38c2768a7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java @@ -4,7 +4,6 @@ import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import java.util.Collection; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -12,10 +11,6 @@ public abstract class AspectPayloadValidator extends PluginSpec { - public AspectPayloadValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Validate a proposal for the given change type for an aspect within the context of the given * entity's urn. diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java index 47814bef26e9a..2ad885dc9fdd2 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java @@ -16,13 +16,17 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; /** Common implementation of checking for create if not exists semantics. */ +@Setter +@Getter +@Accessors(chain = true) public class CreateIfNotExistsValidator extends AspectPayloadValidator { - public CreateIfNotExistsValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + @Nonnull private AspectPluginConfig config; @Override protected Stream validatePreCommitAspects( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java index 436c863ad048c..a4efc38d16082 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java @@ -31,12 +31,15 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +@Getter +@Setter +@Accessors(chain = true) public class PropertyDefinitionValidator extends AspectPayloadValidator { - - public PropertyDefinitionValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + private AspectPluginConfig config; /** * Prevent deletion of the definition or key aspect (only soft delete) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java index f0971ca35a88e..fcae6ca8cb71a 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java @@ -44,10 +44,16 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; /** A Validator for StructuredProperties Aspect that is attached to entities like Datasets, etc. */ +@Setter +@Getter @Slf4j +@Accessors(chain = true) public class StructuredPropertiesValidator extends AspectPayloadValidator { private static final Set CHANGE_TYPES = ImmutableSet.of(ChangeType.CREATE, ChangeType.CREATE_ENTITY, ChangeType.UPSERT); @@ -60,10 +66,6 @@ public class StructuredPropertiesValidator extends AspectPayloadValidator { LogicalValueType.DATE, LogicalValueType.URN)); - public StructuredPropertiesValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - public static LogicalValueType getLogicalValueType(Urn valueType) { String valueTypeId = getValueTypeId(valueType); if (valueTypeId.equals("string")) { @@ -81,6 +83,8 @@ public static LogicalValueType getLogicalValueType(Urn valueType) { return LogicalValueType.UNKNOWN; } + @Nonnull private AspectPluginConfig config; + @Override protected Stream validateProposedAspects( @Nonnull Collection mcpItems, diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java index 9aed29ab8595e..4238c333615ec 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java @@ -9,6 +9,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.DefaultEntitySpec; @@ -33,9 +34,11 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.BiFunction; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -48,6 +51,10 @@ public class ConfigEntityRegistry implements EntityRegistry { private final DataSchemaFactory dataSchemaFactory; @Getter private final PluginFactory pluginFactory; + + @Nullable + private BiFunction, PluginFactory> pluginFactoryProvider; + private final Map entityNameToSpec; private final Map eventNameToSpec; private final List entitySpecs; @@ -66,19 +73,27 @@ public class ConfigEntityRegistry implements EntityRegistry { .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); } - public ConfigEntityRegistry(Pair configFileClassPathPair) throws IOException { + public ConfigEntityRegistry( + Pair configFileClassPathPair, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) + throws IOException { this( DataSchemaFactory.withCustomClasspath(configFileClassPathPair.getSecond()), DataSchemaFactory.getClassLoader(configFileClassPathPair.getSecond()) .map(Stream::of) .orElse(Stream.empty()) .collect(Collectors.toList()), - configFileClassPathPair.getFirst()); + configFileClassPathPair.getFirst(), + pluginFactoryProvider); } - public ConfigEntityRegistry(String entityRegistryRoot) + public ConfigEntityRegistry( + String entityRegistryRoot, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws EntityRegistryException, IOException { - this(getFileAndClassPath(entityRegistryRoot)); + this(getFileAndClassPath(entityRegistryRoot), pluginFactoryProvider); } private static Pair getFileAndClassPath(String entityRegistryRoot) @@ -117,24 +132,57 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) } public ConfigEntityRegistry(InputStream configFileInputStream) { - this(DataSchemaFactory.getInstance(), Collections.emptyList(), configFileInputStream); + this(configFileInputStream, null); } public ConfigEntityRegistry( - DataSchemaFactory dataSchemaFactory, List classLoaders, Path configFilePath) + InputStream configFileInputStream, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { + this( + DataSchemaFactory.getInstance(), + Collections.emptyList(), + configFileInputStream, + pluginFactoryProvider); + } + + public ConfigEntityRegistry( + DataSchemaFactory dataSchemaFactory, + List classLoaders, + Path configFilePath, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws FileNotFoundException { - this(dataSchemaFactory, classLoaders, new FileInputStream(configFilePath.toString())); + this( + dataSchemaFactory, + classLoaders, + new FileInputStream(configFilePath.toString()), + pluginFactoryProvider); } public ConfigEntityRegistry( DataSchemaFactory dataSchemaFactory, List classLoaders, InputStream configFileStream) { + this(dataSchemaFactory, classLoaders, configFileStream, null); + } + + public ConfigEntityRegistry( + DataSchemaFactory dataSchemaFactory, + List classLoaders, + InputStream configFileStream, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { this.dataSchemaFactory = dataSchemaFactory; Entities entities; try { entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); - this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + if (pluginFactoryProvider != null) { + this.pluginFactory = pluginFactoryProvider.apply(entities.getPlugins(), classLoaders); + } else { + this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + } + this.pluginFactoryProvider = pluginFactoryProvider; } catch (IOException e) { throw new IllegalArgumentException( String.format( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java index 83f5ab08e9f19..405c848f53660 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java @@ -2,6 +2,7 @@ import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffect; import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; @@ -12,6 +13,7 @@ import com.linkedin.metadata.models.EventSpec; import java.util.List; import java.util.Map; +import java.util.function.BiFunction; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -135,4 +137,10 @@ default List getAllMCLSideEffects() { default PluginFactory getPluginFactory() { return PluginFactory.empty(); } + + @Nullable + default BiFunction, PluginFactory> + getPluginFactoryProvider() { + return null; + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java index 6a733cc23f395..ac8e302ac5077 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java @@ -5,6 +5,7 @@ import com.linkedin.data.schema.compatibility.CompatibilityResult; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.ConfigEntitySpec; import com.linkedin.metadata.models.DefaultEntitySpec; @@ -14,8 +15,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.BiFunction; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -28,7 +31,11 @@ public class MergedEntityRegistry implements EntityRegistry { private final Map eventNameToSpec; private final AspectTemplateEngine _aspectTemplateEngine; private final Map _aspectNameToSpec; - @Nonnull private PluginFactory pluginFactory; + + @Getter @Nonnull private PluginFactory pluginFactory; + + @Getter @Nullable + private BiFunction, PluginFactory> pluginFactoryProvider; public MergedEntityRegistry(EntityRegistry baseEntityRegistry) { // baseEntityRegistry.get*Specs() can return immutable Collections.emptyMap() which fails @@ -51,6 +58,7 @@ public MergedEntityRegistry(EntityRegistry baseEntityRegistry) { } else { this.pluginFactory = PluginFactory.empty(); } + this.pluginFactoryProvider = baseEntityRegistry.getPluginFactoryProvider(); } private void validateEntitySpec(EntitySpec entitySpec, final ValidationResult validationResult) { @@ -100,7 +108,8 @@ public MergedEntityRegistry apply(EntityRegistry patchEntityRegistry) // Merge Plugins this.pluginFactory = - PluginFactory.merge(this.pluginFactory, patchEntityRegistry.getPluginFactory()); + PluginFactory.merge( + this.pluginFactory, patchEntityRegistry.getPluginFactory(), this.pluginFactoryProvider); return this; } @@ -220,12 +229,6 @@ public AspectTemplateEngine getAspectTemplateEngine() { return _aspectTemplateEngine; } - @Nonnull - @Override - public PluginFactory getPluginFactory() { - return this.pluginFactory; - } - @Setter @Getter private static class ValidationResult { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java index b4fc4193e7263..7de040b03de72 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java @@ -9,6 +9,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.EntitySpec; @@ -31,9 +32,11 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.BiFunction; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.maven.artifact.versioning.ComparableVersion; @@ -48,6 +51,10 @@ public class PatchEntityRegistry implements EntityRegistry { private final DataSchemaFactory dataSchemaFactory; @Getter private final PluginFactory pluginFactory; + + @Getter @Nullable + private BiFunction, PluginFactory> pluginFactoryProvider; + private final Map entityNameToSpec; private final Map eventNameToSpec; private final Map _aspectNameToSpec; @@ -88,7 +95,9 @@ public String toString() { public PatchEntityRegistry( Pair configFileClassPathPair, String registryName, - ComparableVersion registryVersion) + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws IOException, EntityRegistryException { this( DataSchemaFactory.withCustomClasspath(configFileClassPathPair.getSecond()), @@ -98,13 +107,22 @@ public PatchEntityRegistry( .collect(Collectors.toList()), configFileClassPathPair.getFirst(), registryName, - registryVersion); + registryVersion, + pluginFactoryProvider); } public PatchEntityRegistry( - String entityRegistryRoot, String registryName, ComparableVersion registryVersion) + String entityRegistryRoot, + String registryName, + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws EntityRegistryException, IOException { - this(getFileAndClassPath(entityRegistryRoot), registryName, registryVersion); + this( + getFileAndClassPath(entityRegistryRoot), + registryName, + registryVersion, + pluginFactoryProvider); } private static Pair getFileAndClassPath(String entityRegistryRoot) @@ -147,14 +165,17 @@ public PatchEntityRegistry( List classLoaders, Path configFilePath, String registryName, - ComparableVersion registryVersion) + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws FileNotFoundException, EntityRegistryException { this( dataSchemaFactory, classLoaders, new FileInputStream(configFilePath.toString()), registryName, - registryVersion); + registryVersion, + pluginFactoryProvider); } private PatchEntityRegistry( @@ -162,7 +183,9 @@ private PatchEntityRegistry( List classLoaders, InputStream configFileStream, String registryName, - ComparableVersion registryVersion) + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws EntityRegistryException { this.dataSchemaFactory = dataSchemaFactory; this.registryName = registryName; @@ -171,7 +194,12 @@ private PatchEntityRegistry( Entities entities; try { entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); - this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + if (pluginFactoryProvider != null) { + this.pluginFactory = pluginFactoryProvider.apply(entities.getPlugins(), classLoaders); + } else { + this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + } + this.pluginFactoryProvider = pluginFactoryProvider; } catch (IOException e) { log.error("Unable to read Patch configuration.", e); throw new IllegalArgumentException( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java index 09b33ad0f596e..4f2e5a106ae79 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.models.registry; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.registry.config.EntityRegistryLoadResult; import com.linkedin.metadata.models.registry.config.LoadStatus; import com.linkedin.util.Pair; @@ -19,7 +21,9 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.BiFunction; import java.util.stream.Collectors; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; import org.apache.maven.artifact.versioning.ComparableVersion; @@ -33,13 +37,22 @@ public class PluginEntityRegistryLoader { private final Map>> patchRegistries; private MergedEntityRegistry mergedEntityRegistry; + + @Nullable + private final BiFunction, PluginFactory> + pluginFactoryProvider; + private boolean started = false; private final Lock lock = new ReentrantLock(); private final Condition initialized = lock.newCondition(); private boolean booted = false; private final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(1); - public PluginEntityRegistryLoader(String pluginDirectory, int loadDelaySeconds) { + public PluginEntityRegistryLoader( + String pluginDirectory, + int loadDelaySeconds, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { File directory = new File(pluginDirectory); if (!directory.exists() || !directory.isDirectory()) { log.warn( @@ -52,6 +65,7 @@ public PluginEntityRegistryLoader(String pluginDirectory, int loadDelaySeconds) this.pluginDirectory = pluginDirectory; this.patchRegistries = new HashMap<>(); this.loadDelaySeconds = loadDelaySeconds; + this.pluginFactoryProvider = pluginFactoryProvider; } public Map>> @@ -180,7 +194,9 @@ private void loadOneRegistry( EntityRegistryLoadResult.builder().registryLocation(patchDirectory); EntityRegistry entityRegistry = null; try { - entityRegistry = new PatchEntityRegistry(patchDirectory, registryName, registryVersion); + entityRegistry = + new PatchEntityRegistry( + patchDirectory, registryName, registryVersion, pluginFactoryProvider); parentRegistry.apply(entityRegistry); loadResultBuilder.loadResult(LoadStatus.SUCCESS); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java index 363a9d01c95bc..e1e84f5728540 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java @@ -52,17 +52,18 @@ public void testSoftDeleteFilter() throws URISyntaxException, CloneNotSupportedE .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(0.0))); StructuredPropertiesSoftDelete testHook = - new StructuredPropertiesSoftDelete( - AspectPluginConfig.builder() - .enabled(true) - .className(StructuredPropertiesSoftDelete.class.getName()) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName(DATASET_ENTITY_NAME) - .aspectName(Constants.STRUCTURED_PROPERTIES_ASPECT_NAME) - .build())) - .build()); + new StructuredPropertiesSoftDelete() + .setConfig( + AspectPluginConfig.builder() + .enabled(true) + .className(StructuredPropertiesSoftDelete.class.getName()) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(DATASET_ENTITY_NAME) + .aspectName(Constants.STRUCTURED_PROPERTIES_ASPECT_NAME) + .build())) + .build()); StructuredProperties expectedAllValues = new StructuredProperties(); expectedAllValues.setProperties( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java index e1de7cf87ee18..60bbdba16374a 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java @@ -14,6 +14,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -41,25 +44,27 @@ public void testCustomMCLSideEffect() { assertEquals( mclSideEffects, List.of( - new TestMCLSideEffect( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("chart") - .aspectName("chartInfo") - .build())) - .build()))); + new TestMCLSideEffect() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("chart") + .aspectName("chartInfo") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestMCLSideEffect extends MCLSideEffect { - public TestMCLSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + public AspectPluginConfig config; @Override protected Stream applyMCLSideEffect( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java index 70b8a2fe6de43..8e877d1d23aad 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java @@ -14,6 +14,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -41,26 +44,28 @@ public void testCustomMCPSideEffect() { assertEquals( mcpSideEffects, List.of( - new MCPSideEffectTest.TestMCPSideEffect( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("dataset") - .aspectName("datasetKey") - .build())) - .build()))); + new MCPSideEffectTest.TestMCPSideEffect() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("dataset") + .aspectName("datasetKey") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestMCPSideEffect extends MCPSideEffect { - public TestMCPSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + public AspectPluginConfig config; @Override protected Stream applyMCPSideEffect( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java index 16ea003582b18..9722f64ec82a0 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java @@ -9,6 +9,9 @@ import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import java.util.List; import java.util.stream.Collectors; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -36,24 +39,26 @@ public void testCustomMutator() { assertEquals( mutators, List.of( - new TestMutator( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("*") - .aspectName("schemaMetadata") - .build())) - .build()))); + new TestMutator() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("*") + .aspectName("schemaMetadata") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestMutator extends MutationHook { - public TestMutator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + public AspectPluginConfig config; } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java index 879464d332169..2667467b39e19 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java @@ -15,6 +15,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -42,39 +45,42 @@ public void testCustomValidator() { assertEquals( validators, List.of( - new TestValidator( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("*") - .aspectName("status") - .build())) - .build()), - new TestValidator( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("chart") - .aspectName("status") - .build())) - .build()))); + new TestValidator() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("*") + .aspectName("status") + .build())) + .build()), + new TestValidator() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("chart") + .aspectName("status") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestValidator extends AspectPayloadValidator { - public TestValidator(AspectPluginConfig config) { - super(config); - } + public AspectPluginConfig config; @Override protected Stream validateProposedAspects( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java index 5ccc9ceb8d02c..c201c2b11925a 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java @@ -51,7 +51,7 @@ public void init() { @Test public void testCreateIfEntityNotExistsSuccess() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); Set exceptions = @@ -87,7 +87,7 @@ public void testCreateIfEntityNotExistsSuccess() { @Test public void testCreateIfEntityNotExistsFail() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); ChangeMCP testItem = @@ -114,7 +114,7 @@ public void testCreateIfEntityNotExistsFail() { @Test public void testCreateIfNotExistsSuccess() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); Set exceptions = @@ -138,7 +138,7 @@ public void testCreateIfNotExistsSuccess() { @Test public void testCreateIfNotExistsFail() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); SystemAspect mockSystemAspect = mock(SystemAspect.class); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java index e654bb5002afc..841cbf5a77bec 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java @@ -293,22 +293,23 @@ public void testCanChangeAllowedValueDescriptions() @Test public void testHardDeleteBlock() { PropertyDefinitionValidator test = - new PropertyDefinitionValidator( - AspectPluginConfig.builder() - .enabled(true) - .className(PropertyDefinitionValidator.class.getName()) - .supportedOperations(List.of("DELETE")) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName(Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .build(), - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName("structuredPropertyKey") - .build())) - .build()); + new PropertyDefinitionValidator() + .setConfig( + AspectPluginConfig.builder() + .enabled(true) + .className(PropertyDefinitionValidator.class.getName()) + .supportedOperations(List.of("DELETE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName(Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) + .build(), + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName("structuredPropertyKey") + .build())) + .build()); assertEquals( test.validateProposed( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java index 8589bc1639f5c..3f00cfeac7fb9 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java @@ -98,7 +98,7 @@ public void testOpenApiSpecBuilder() throws Exception { new ConfigEntityRegistry( TestEntityProfile.class.getClassLoader().getResourceAsStream("entity-registry.yml")); MergedEntityRegistry er = new MergedEntityRegistry(configEntityRegistry); - new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 1) + new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 1, null) .withBaseRegistry(er) .start(true); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java index 27227f133ab55..dd9f6a56428e0 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java @@ -22,7 +22,8 @@ public void testEntityRegistryLoad() throws Exception, EntityRegistryException { + "/" + TestConstants.TEST_VERSION.toString(), TestConstants.TEST_REGISTRY, - TestConstants.TEST_VERSION); + TestConstants.TEST_VERSION, + null); Map entitySpecs = patchEntityRegistry.getEntitySpecs(); assertEquals(entitySpecs.values().size(), 1); @@ -64,7 +65,8 @@ public void testEntityRegistryWithKeyLoad() throws Exception, EntityRegistryExce DataSchemaFactory.getClassLoader(pluginLocation).stream().toList(), Paths.get("src/test_plugins/mycompany-full-model/0.0.1/entity-registry.yaml"), TestConstants.TEST_REGISTRY, - TestConstants.TEST_VERSION); + TestConstants.TEST_VERSION, + null); Map entitySpecs = patchEntityRegistry.getEntitySpecs(); assertEquals(entitySpecs.values().size(), 1); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java index 13582696bde03..47c29405a774e 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java @@ -76,7 +76,7 @@ public AspectTemplateEngine getAspectTemplateEngine() { MergedEntityRegistry configEntityRegistry = new MergedEntityRegistry(baseEntityRegistry); PluginEntityRegistryLoader pluginEntityRegistryLoader = - new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 60) + new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 60, null) .withBaseRegistry(configEntityRegistry) .start(true); assertEquals(pluginEntityRegistryLoader.getPatchRegistries().size(), 1); @@ -171,7 +171,7 @@ public void testEntityRegistryWithGoodBase() throws FileNotFoundException, Inter MergedEntityRegistry mergedEntityRegistry = new MergedEntityRegistry(getBaseEntityRegistry()); PluginEntityRegistryLoader pluginEntityRegistryLoader = - new PluginEntityRegistryLoader(BASE_DIRECTORY, 60) + new PluginEntityRegistryLoader(BASE_DIRECTORY, 60, null) .withBaseRegistry(mergedEntityRegistry) .start(true); assertEquals(pluginEntityRegistryLoader.getPatchRegistries().size(), 1); @@ -216,7 +216,7 @@ public void testEntityRegistryVersioning() throws InterruptedException { String multiversionPluginDir = "src/test_plugins/"; PluginEntityRegistryLoader pluginEntityRegistryLoader = - new PluginEntityRegistryLoader(multiversionPluginDir, 60) + new PluginEntityRegistryLoader(multiversionPluginDir, 60, null) .withBaseRegistry(mergedEntityRegistry) .start(true); Map>> diff --git a/metadata-integration/java/custom-plugin-lib/build.gradle b/metadata-integration/java/custom-plugin-lib/build.gradle new file mode 100644 index 0000000000000..9fbe1066706be --- /dev/null +++ b/metadata-integration/java/custom-plugin-lib/build.gradle @@ -0,0 +1,116 @@ +plugins { + id 'java-library' + id 'com.github.johnrengelman.shadow' + id 'signing' + id 'io.codearte.nexus-staging' + id 'maven-publish' +} + +apply from: "../versioning.gradle" + +jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation + +// only include since required registry file +processResources { + from("${project(':metadata-models').projectDir}/src/main/resources/entity-registry.yml") +} + +dependencies { + // Required for custom code plugins + api(project(':entity-registry')) { + // only include dataTemplate (and resources/entity-registry.yml from above) + exclude module: 'metadata-models' + } + implementation project(path: ':metadata-models', configuration: 'dataTemplate') + + // Required for MCL/MCP hooks + implementation(project(':metadata-io:metadata-io-api')) { + transitive = false + } + + // utility classes + implementation(project(':metadata-utils')) { + transitive = false + } +} + +configurations.all { + exclude group: 'org.antlr' +} + +shadowJar { + zip64 = true + archiveClassifier = '' + // preventing java multi-release JAR leakage + // https://github.com/johnrengelman/shadow/issues/729 + exclude('module-info.class', 'META-INF/versions/**', + '**/LICENSE', '**/LICENSE*.txt', '**/NOTICE', '**/NOTICE.txt', 'licenses/**', 'log4j2.*', 'log4j.*') + relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson' + mergeServiceFiles() +} + +publishing { + publications { + shadow(MavenPublication) { publication -> + project.shadow.component(publication) + pom { + name = 'DataHub Custom Plugin Dependency' + group = 'io.acryl' + artifactId = 'datahub-custom-plugin-lib' + description = 'DataHub Java Custom Plugin dependencies' + url = 'https://datahubproject.io' + artifacts = [shadowJar] + + scm { + connection = 'scm:git:git://github.com/datahub-project/datahub.git' + developerConnection = 'scm:git:ssh://github.com:datahub-project/datahub.git' + url = 'https://github.com/datahub-project/datahub.git' + } + + licenses { + license { + name = 'The Apache License, Version 2.0' + url = 'http://www.apache.org/licenses/LICENSE-2.0.txt' + } + } + + developers { + developer { + id = 'datahub' + name = 'Datahub' + email = 'datahub@acryl.io' + } + } + } + } + } + + repositories { +/* maven { + def releasesRepoUrl = "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/" + def snapshotsRepoUrl = "https://s01.oss.sonatype.org/content/repositories/snapshots/" + def ossrhUsername = System.getenv('RELEASE_USERNAME') + def ossrhPassword = System.getenv('RELEASE_PASSWORD') + credentials { + username ossrhUsername + password ossrhPassword + } + url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl + }*/ + } +} + +signing { + required { gradle.taskGraph.hasTask("publish") } + def signingKey = findProperty("signingKey") + def signingPassword = System.getenv("SIGNING_PASSWORD") + useInMemoryPgpKeys(signingKey, signingPassword) + sign publishing.publications.shadow +} + +nexusStaging { + serverUrl = "https://s01.oss.sonatype.org/service/local/" + //required only for projects registered in Sonatype after 2021-02-24 + username = System.getenv("NEXUS_USERNAME") + password = System.getenv("NEXUS_PASSWORD") +} \ No newline at end of file diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 49d7610dc3c89..5bd73c844b380 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -10,6 +10,7 @@ configurations { dependencies { implementation project(':entity-registry') implementation project(':metadata-service:auth-config') + api project(':metadata-io:metadata-io-api') api project(':metadata-utils') api project(':metadata-events:mxe-avro') api project(':metadata-events:mxe-registration') diff --git a/metadata-io/metadata-io-api/README.txt b/metadata-io/metadata-io-api/README.txt new file mode 100644 index 0000000000000..a9d52d55341a8 --- /dev/null +++ b/metadata-io/metadata-io-api/README.txt @@ -0,0 +1,4 @@ +# :metadata-io:metadata-io-api + +This module exists in order to isolate dependencies when used in external projects. For example, +a custom plugin implementing a custom validator, mutator, or side-effect. \ No newline at end of file diff --git a/metadata-io/metadata-io-api/build.gradle b/metadata-io/metadata-io-api/build.gradle new file mode 100644 index 0000000000000..bd79e8cb3ddef --- /dev/null +++ b/metadata-io/metadata-io-api/build.gradle @@ -0,0 +1,11 @@ +plugins { + id 'java-library' +} + +dependencies { + implementation project(':entity-registry') + implementation project(':metadata-service:services') + implementation project(':metadata-utils') + compileOnly externalDependency.lombok + annotationProcessor externalDependency.lombok +} diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java new file mode 100644 index 0000000000000..656534e24f551 --- /dev/null +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java @@ -0,0 +1,60 @@ +package com.linkedin.metadata.entity; + +import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.metadata.utils.PegasusUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EntityApiUtils { + + private EntityApiUtils() {} + + @Nonnull + public static String toJsonAspect(@Nonnull final RecordTemplate aspectRecord) { + return RecordUtils.toJsonString(aspectRecord); + } + + public static RecordTemplate buildKeyAspect( + @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { + final EntitySpec spec = entityRegistry.getEntitySpec(PegasusUtils.urnToEntityName(urn)); + final AspectSpec keySpec = spec.getKeyAspectSpec(); + return EntityKeyUtils.convertUrnToEntityKey(urn, keySpec); + } + + public static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { + if (jsonSystemMetadata == null || jsonSystemMetadata.equals("")) { + SystemMetadata response = new SystemMetadata(); + response.setRunId(DEFAULT_RUN_ID); + response.setLastObserved(0); + return response; + } + return RecordUtils.toRecordTemplate(SystemMetadata.class, jsonSystemMetadata); + } + + public static MetadataChangeProposal buildMCP( + Urn entityUrn, String aspectName, ChangeType changeType, @Nullable T aspect) { + MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(entityUrn); + proposal.setChangeType(changeType); + proposal.setEntityType(entityUrn.getEntityType()); + proposal.setAspectName(aspectName); + if (aspect != null) { + proposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); + } + return proposal; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java similarity index 92% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java index ae1b3007ed647..cba770d841b94 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java @@ -1,7 +1,5 @@ package com.linkedin.metadata.entity; -import static com.linkedin.metadata.entity.EntityUtils.parseSystemMetadata; - import com.datahub.util.RecordUtils; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -53,10 +51,6 @@ public class EntityAspect { private String createdFor; - public EntityAspectIdentifier getAspectIdentifier() { - return new EntityAspectIdentifier(getUrn(), getAspect(), getVersion()); - } - /** * Provide a typed EntityAspect without breaking the existing public contract with generic types. */ @@ -110,11 +104,7 @@ public long getVersion() { @Nullable public SystemMetadata getSystemMetadata() { - return parseSystemMetadata(getSystemMetadataRaw()); - } - - public EntityAspectIdentifier getAspectIdentifier() { - return entityAspect.getAspectIdentifier(); + return EntityApiUtils.parseSystemMetadata(getSystemMetadataRaw()); } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java similarity index 91% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index a11e01a56c96f..d6c12f2dffc91 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -1,7 +1,5 @@ package com.linkedin.metadata.entity.ebean.batch; -import static com.linkedin.metadata.entity.AspectUtils.validateAspect; - import com.datahub.util.exception.ModelConversionException; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -12,9 +10,10 @@ import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; +import com.linkedin.metadata.entity.AspectUtils; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityAspect; -import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.utils.EntityKeyUtils; @@ -89,12 +88,12 @@ public static ChangeItemImpl fromPatch( public SystemAspect getSystemAspect(@Nullable Long version) { EntityAspect entityAspect = new EntityAspect(); entityAspect.setAspect(getAspectName()); - entityAspect.setMetadata(EntityUtils.toJsonAspect(getRecordTemplate())); + entityAspect.setMetadata(EntityApiUtils.toJsonAspect(getRecordTemplate())); entityAspect.setUrn(getUrn().toString()); entityAspect.setVersion(version == null ? getNextAspectVersion() : version); entityAspect.setCreatedOn(new Timestamp(getAuditStamp().getTime())); entityAspect.setCreatedBy(getAuditStamp().getActor().toString()); - entityAspect.setSystemMetadata(EntityUtils.toJsonAspect(getSystemMetadata())); + entityAspect.setSystemMetadata(EntityApiUtils.toJsonAspect(getSystemMetadata())); return EntityAspect.EntitySystemAspect.builder() .build(getEntitySpec(), getAspectSpec(), entityAspect); } @@ -128,16 +127,16 @@ public ChangeItemImpl build(AspectRetriever aspectRetriever) { // Apply change type default this.changeType = validateOrDefaultChangeType(changeType); - ValidationUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); + ValidationApiUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(aspectRetriever.getEntityRegistry().getEntitySpec(this.urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.aspectName)); + aspectSpec(ValidationApiUtils.validate(this.entitySpec, this.aspectName)); log.debug("aspect spec = {}", this.aspectSpec); - ValidationUtils.validateRecordTemplate( + ValidationApiUtils.validateRecordTemplate( this.entitySpec, this.urn, this.recordTemplate, aspectRetriever); return new ChangeItemImpl( @@ -160,7 +159,7 @@ public static ChangeItemImpl build( log.debug("entity type = {}", mcp.getEntityType()); EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec(mcp.getEntityType()); - AspectSpec aspectSpec = validateAspect(mcp, entitySpec); + AspectSpec aspectSpec = AspectUtils.validateAspect(mcp, entitySpec); if (!MCPItem.isValidChangeType(ChangeType.UPSERT, aspectSpec)) { throw new UnsupportedOperationException( @@ -190,9 +189,9 @@ public static ChangeItemImpl build( // specific to impl, other impls support PATCH, etc private static ChangeType validateOrDefaultChangeType(@Nullable ChangeType changeType) { final ChangeType finalChangeType = changeType == null ? ChangeType.UPSERT : changeType; - if (!CHANGE_TYPES.contains(finalChangeType)) { + if (!MCPItem.CHANGE_TYPES.contains(finalChangeType)) { throw new IllegalArgumentException( - String.format("ChangeType %s not in %s", changeType, CHANGE_TYPES)); + String.format("ChangeType %s not in %s", changeType, MCPItem.CHANGE_TYPES)); } return finalChangeType; } @@ -204,7 +203,7 @@ private static RecordTemplate convertToRecordTemplate( aspect = GenericRecordUtils.deserializeAspect( mcp.getAspect().getValue(), mcp.getAspect().getContentType(), aspectSpec); - ValidationUtils.validateOrThrow(aspect); + ValidationApiUtils.validateOrThrow(aspect); } catch (ModelConversionException e) { throw new RuntimeException( String.format( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java similarity index 90% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java index 0ab854198a282..9c1ded284fa0b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java @@ -7,9 +7,9 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityAspect; -import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.mxe.MetadataChangeProposal; @@ -63,7 +63,7 @@ public SystemMetadata getSystemMetadata() { @Nullable @Override public MetadataChangeProposal getMetadataChangeProposal() { - return EntityUtils.buildMCP(getUrn(), aspectName, getChangeType(), null); + return EntityApiUtils.buildMCP(getUrn(), aspectName, getChangeType(), null); } @Nonnull @@ -96,13 +96,13 @@ private DeleteItemImpl build() { @SneakyThrows public DeleteItemImpl build(AspectRetriever aspectRetriever) { - ValidationUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); + ValidationApiUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(aspectRetriever.getEntityRegistry().getEntitySpec(this.urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.aspectName)); + aspectSpec(ValidationApiUtils.validate(this.entitySpec, this.aspectName)); log.debug("aspect spec = {}", this.aspectSpec); return new DeleteItemImpl( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java similarity index 92% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java index 6efc1e78b543c..94d60d2f67c9c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.entity.AspectUtils; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -63,20 +63,21 @@ public MCLItemImpl build(AspectRetriever aspectRetriever) { EntityKeyUtils.getUrnFromLog( this.metadataChangeLog, this.entitySpec.getKeyAspectSpec()); } - ValidationUtils.validateUrn(entityRegistry, urn); + ValidationApiUtils.validateUrn(entityRegistry, urn); log.debug("entity type = {}", urn.getEntityType()); entitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.metadataChangeLog.getAspectName())); + aspectSpec( + ValidationApiUtils.validate(this.entitySpec, this.metadataChangeLog.getAspectName())); log.debug("aspect spec = {}", this.aspectSpec); Pair aspects = convertToRecordTemplate(this.metadataChangeLog, aspectSpec); // validate new - ValidationUtils.validateRecordTemplate( + ValidationApiUtils.validateRecordTemplate( this.entitySpec, urn, aspects.getFirst(), aspectRetriever); return new MCLItemImpl( @@ -107,7 +108,7 @@ private static Pair convertToRecordTemplate( aspect = GenericRecordUtils.deserializeAspect( mcl.getAspect().getValue(), mcl.getAspect().getContentType(), aspectSpec); - ValidationUtils.validateOrThrow(aspect); + ValidationApiUtils.validateOrThrow(aspect); } else { aspect = null; } @@ -118,7 +119,7 @@ private static Pair convertToRecordTemplate( mcl.getPreviousAspectValue().getValue(), mcl.getPreviousAspectValue().getContentType(), aspectSpec); - ValidationUtils.validateOrThrow(prevAspect); + ValidationApiUtils.validateOrThrow(prevAspect); } else { prevAspect = null; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java similarity index 97% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java index 0efa45d121f2a..f4473c8db3148 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java @@ -16,7 +16,7 @@ import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.batch.PatchMCP; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -122,13 +122,13 @@ public PatchItemImpl.PatchItemImplBuilder systemMetadata(SystemMetadata systemMe } public PatchItemImpl build(EntityRegistry entityRegistry) { - ValidationUtils.validateUrn(entityRegistry, this.urn); + ValidationApiUtils.validateUrn(entityRegistry, this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(entityRegistry.getEntitySpec(this.urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.aspectName)); + aspectSpec(ValidationApiUtils.validate(this.entitySpec, this.aspectName)); log.debug("aspect spec = {}", this.aspectSpec); if (this.patch == null) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java new file mode 100644 index 0000000000000..ed79f23823a84 --- /dev/null +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java @@ -0,0 +1,124 @@ +package com.linkedin.metadata.entity.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.schema.validation.ValidationResult; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.entity.EntityApiUtils; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.net.URISyntaxException; +import java.net.URLEncoder; +import java.util.function.Consumer; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class ValidationApiUtils { + public static final int URN_NUM_BYTES_LIMIT = 512; + public static final String URN_DELIMITER_SEPARATOR = "␟"; + + /** + * Validates a {@link RecordTemplate} and throws {@link ValidationException} if validation fails. + * + * @param record record to be validated. + */ + public static void validateOrThrow(RecordTemplate record) { + RecordTemplateValidator.validate( + record, + validationResult -> { + throw new ValidationException( + String.format( + "Failed to validate record with class %s: %s", + record.getClass().getName(), validationResult.getMessages().toString())); + }); + } + + public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { + EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); + validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); + RecordTemplateValidator.validate( + EntityApiUtils.buildKeyAspect(entityRegistry, urn), + validationResult -> { + throw new IllegalArgumentException( + "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); + }, + validator); + + if (urn.toString().trim().length() != urn.toString().length()) { + throw new IllegalArgumentException( + "Error: cannot provide an URN with leading or trailing whitespace"); + } + if (URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { + throw new IllegalArgumentException( + "Error: cannot provide an URN longer than " + + Integer.toString(URN_NUM_BYTES_LIMIT) + + " bytes (when URL encoded)"); + } + if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { + throw new IllegalArgumentException( + "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); + } + try { + Urn.createFromString(urn.toString()); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Validates a {@link RecordTemplate} and logs a warning if validation fails. + * + * @param record record to be validated.ailure. + */ + public static void validateOrWarn(RecordTemplate record) { + RecordTemplateValidator.validate( + record, + validationResult -> { + log.warn(String.format("Failed to validate record %s against its schema.", record)); + }); + } + + public static AspectSpec validate(EntitySpec entitySpec, String aspectName) { + if (aspectName == null || aspectName.isEmpty()) { + throw new UnsupportedOperationException( + "Aspect name is required for create and update operations"); + } + + AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); + + if (aspectSpec == null) { + throw new RuntimeException( + String.format("Unknown aspect %s for entity %s", aspectName, entitySpec.getName())); + } + + return aspectSpec; + } + + public static void validateRecordTemplate( + EntitySpec entitySpec, + Urn urn, + @Nullable RecordTemplate aspect, + @Nonnull AspectRetriever aspectRetriever) { + EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); + EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); + validator.setCurrentEntitySpec(entitySpec); + Consumer resultFunction = + validationResult -> { + throw new IllegalArgumentException( + "Invalid format for aspect: " + + entitySpec.getName() + + "\n Cause: " + + validationResult.getMessages()); + }; + + RecordTemplateValidator.validate( + EntityApiUtils.buildKeyAspect(entityRegistry, urn), resultFunction, validator); + + if (aspect != null) { + RecordTemplateValidator.validate(aspect, resultFunction, validator); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index 5413fb8382d9d..21bac3cbb0e61 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -20,8 +20,8 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -157,7 +157,7 @@ private static List> generateDefaultAspects( // Key Aspect final String keyAspectName = opContext.getKeyAspectName(urn); defaultAspects.add( - Pair.of(keyAspectName, EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), urn))); + Pair.of(keyAspectName, EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn))); // Other Aspects defaultAspects.addAll( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java index 887bd3910310d..e4b12c706ce28 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java @@ -27,4 +27,14 @@ public static EntityAspectIdentifier fromCassandra(CassandraAspect cassandraAspe return new EntityAspectIdentifier( cassandraAspect.getUrn(), cassandraAspect.getAspect(), cassandraAspect.getVersion()); } + + public static EntityAspectIdentifier fromEntityAspect(EntityAspect entityAspect) { + return new EntityAspectIdentifier( + entityAspect.getUrn(), entityAspect.getAspect(), entityAspect.getVersion()); + } + + public static EntityAspectIdentifier fromSystemEntityAspect( + EntityAspect.EntitySystemAspect systemAspect) { + return fromEntityAspect(systemAspect.getEntityAspect()); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index ef4724d4a4094..01ed02ae848ef 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -227,7 +227,7 @@ public Map> getLatestAspects( .forEach( key -> { final RecordTemplate keyAspect = - EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), key); + EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), key); urnToAspects.get(key).add(keyAspect); }); @@ -1046,7 +1046,7 @@ private Stream ingestTimeseriesProposal( .auditStamp(item.getAuditStamp()) .systemMetadata(item.getSystemMetadata()) .recordTemplate( - EntityUtils.buildKeyAspect( + EntityApiUtils.buildKeyAspect( opContext.getEntityRegistry(), item.getUrn())) .build(opContext.getRetrieverContext().get().getAspectRetriever())) .collect(Collectors.toList()); @@ -1438,7 +1438,7 @@ private RestoreIndicesResult restoreIndices( .aspectSpec(entitySpec.getKeyAspectSpec()) .auditStamp(auditStamp) .systemMetadata(latestSystemMetadata) - .recordTemplate(EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), urn)) + .recordTemplate(EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn)) .build(opContext.getRetrieverContext().get().getAspectRetriever())); Stream defaultAspectsResult = ingestProposalSync( @@ -2278,7 +2278,8 @@ private Map getEnvelopedAspects( .collect( Collectors.toMap( systemAspect -> - ((EntityAspect.EntitySystemAspect) systemAspect).getAspectIdentifier(), + EntityAspectIdentifier.fromSystemEntityAspect( + (EntityAspect.EntitySystemAspect) systemAspect), systemAspect -> ((EntityAspect.EntitySystemAspect) systemAspect).toEnvelopedAspects())); } @@ -2334,13 +2335,13 @@ private UpdateAspectResult ingestAspectToLocalDB( // 4. Save the newValue as the latest version log.debug("Ingesting aspect with name {}, urn {}", aspectName, urn); - String newValueStr = EntityUtils.toJsonAspect(newValue); + String newValueStr = EntityApiUtils.toJsonAspect(newValue); long versionOfOld = aspectDao.saveLatestAspect( tx, urn.toString(), aspectName, - latest == null ? null : EntityUtils.toJsonAspect(oldValue), + latest == null ? null : EntityApiUtils.toJsonAspect(oldValue), latest == null ? null : latest.getCreatedBy(), latest == null ? null : latest.getEntityAspect().getCreatedFor(), latest == null ? null : latest.getCreatedOn(), @@ -2349,7 +2350,7 @@ private UpdateAspectResult ingestAspectToLocalDB( auditStamp.getActor().toString(), auditStamp.hasImpersonator() ? auditStamp.getImpersonator().toString() : null, new Timestamp(auditStamp.getTime()), - EntityUtils.toJsonAspect(providedSystemMetadata), + EntityApiUtils.toJsonAspect(providedSystemMetadata), nextVersion); // metrics diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index 701cde1b4ef8a..e542b10af4ddc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -1,7 +1,6 @@ package com.linkedin.metadata.entity; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; import com.datahub.util.RecordUtils; import com.google.common.base.Preconditions; @@ -14,7 +13,6 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.ReadItem; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.SystemAspect; @@ -27,10 +25,8 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; @@ -48,11 +44,6 @@ public class EntityUtils { private EntityUtils() {} - @Nonnull - public static String toJsonAspect(@Nonnull final RecordTemplate aspectRecord) { - return RecordUtils.toJsonString(aspectRecord); - } - @Nullable public static Urn getUrnFromString(String urnStr) { try { @@ -120,13 +111,6 @@ public static RecordTemplate getAspectFromEntity( } } - public static RecordTemplate buildKeyAspect( - @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { - final EntitySpec spec = entityRegistry.getEntitySpec(urnToEntityName(urn)); - final AspectSpec keySpec = spec.getKeyAspectSpec(); - return EntityKeyUtils.convertUrnToEntityKey(urn, keySpec); - } - static Entity toEntity(@Nonnull final Snapshot snapshot) { return new Entity().setValue(snapshot); } @@ -163,7 +147,7 @@ static EntityResponse toEntityResponse( final Urn urn, final List envelopedAspects) { final EntityResponse response = new EntityResponse(); response.setUrn(urn); - response.setEntityName(urnToEntityName(urn)); + response.setEntityName(PegasusUtils.urnToEntityName(urn)); response.setAspects( new EnvelopedAspectMap( envelopedAspects.stream() @@ -181,7 +165,7 @@ static EntityResponse toEntityResponse( public static Optional toSystemAspect( @Nonnull RetrieverContext retrieverContext, @Nullable EntityAspect entityAspect) { return Optional.ofNullable(entityAspect) - .map(aspect -> EntityUtils.toSystemAspects(retrieverContext, List.of(aspect))) + .map(aspect -> toSystemAspects(retrieverContext, List.of(aspect))) .filter(systemAspects -> !systemAspects.isEmpty()) .map(systemAspects -> systemAspects.get(0)); } @@ -294,27 +278,4 @@ public static List toSystemAspects( return systemAspects; } - - public static MetadataChangeProposal buildMCP( - Urn entityUrn, String aspectName, ChangeType changeType, @Nullable T aspect) { - MetadataChangeProposal proposal = new MetadataChangeProposal(); - proposal.setEntityUrn(entityUrn); - proposal.setChangeType(changeType); - proposal.setEntityType(entityUrn.getEntityType()); - proposal.setAspectName(aspectName); - if (aspect != null) { - proposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); - } - return proposal; - } - - public static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { - if (jsonSystemMetadata == null || jsonSystemMetadata.equals("")) { - SystemMetadata response = new SystemMetadata(); - response.setRunId(DEFAULT_RUN_ID); - response.setLastObserved(0); - return response; - } - return RecordUtils.toRecordTemplate(SystemMetadata.class, jsonSystemMetadata); - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index 71b9b9ad86f72..c1e9b4207def6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -200,7 +200,7 @@ public Map batchGet( return keys.stream() .map(this::getAspect) .filter(Objects::nonNull) - .collect(Collectors.toMap(EntityAspect::getAspectIdentifier, aspect -> aspect)); + .collect(Collectors.toMap(EntityAspectIdentifier::fromEntityAspect, aspect -> aspect)); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java index 1cb36568feacc..ddcc6b6599231 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java @@ -3,21 +3,14 @@ import com.codahale.metrics.Timer; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; -import com.linkedin.data.schema.validation.ValidationResult; import com.linkedin.data.template.AbstractArrayTemplate; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultEntity; import com.linkedin.metadata.browse.BrowseResultEntityArray; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.LineageRelationship; import com.linkedin.metadata.graph.LineageRelationshipArray; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ListResult; import com.linkedin.metadata.search.LineageScrollResult; import com.linkedin.metadata.search.LineageSearchEntity; @@ -29,11 +22,8 @@ import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; -import java.net.URISyntaxException; -import java.net.URLEncoder; import java.util.Objects; import java.util.Set; -import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -43,110 +33,6 @@ @Slf4j public class ValidationUtils { - public static final int URN_NUM_BYTES_LIMIT = 512; - public static final String URN_DELIMITER_SEPARATOR = "␟"; - - /** - * Validates a {@link RecordTemplate} and throws {@link ValidationException} if validation fails. - * - * @param record record to be validated. - */ - public static void validateOrThrow(RecordTemplate record) { - RecordTemplateValidator.validate( - record, - validationResult -> { - throw new ValidationException( - String.format( - "Failed to validate record with class %s: %s", - record.getClass().getName(), validationResult.getMessages().toString())); - }); - } - - /** - * Validates a {@link RecordTemplate} and logs a warning if validation fails. - * - * @param record record to be validated.ailure. - */ - public static void validateOrWarn(RecordTemplate record) { - RecordTemplateValidator.validate( - record, - validationResult -> { - log.warn(String.format("Failed to validate record %s against its schema.", record)); - }); - } - - public static AspectSpec validate(EntitySpec entitySpec, String aspectName) { - if (aspectName == null || aspectName.isEmpty()) { - throw new UnsupportedOperationException( - "Aspect name is required for create and update operations"); - } - - AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); - - if (aspectSpec == null) { - throw new RuntimeException( - String.format("Unknown aspect %s for entity %s", aspectName, entitySpec.getName())); - } - - return aspectSpec; - } - - public static void validateRecordTemplate( - EntitySpec entitySpec, - Urn urn, - @Nullable RecordTemplate aspect, - @Nonnull AspectRetriever aspectRetriever) { - EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); - EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); - validator.setCurrentEntitySpec(entitySpec); - Consumer resultFunction = - validationResult -> { - throw new IllegalArgumentException( - "Invalid format for aspect: " - + entitySpec.getName() - + "\n Cause: " - + validationResult.getMessages()); - }; - - RecordTemplateValidator.validate( - EntityUtils.buildKeyAspect(entityRegistry, urn), resultFunction, validator); - - if (aspect != null) { - RecordTemplateValidator.validate(aspect, resultFunction, validator); - } - } - - public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { - EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); - validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); - RecordTemplateValidator.validate( - EntityUtils.buildKeyAspect(entityRegistry, urn), - validationResult -> { - throw new IllegalArgumentException( - "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); - }, - validator); - - if (urn.toString().trim().length() != urn.toString().length()) { - throw new IllegalArgumentException( - "Error: cannot provide an URN with leading or trailing whitespace"); - } - if (URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { - throw new IllegalArgumentException( - "Error: cannot provide an URN longer than " - + Integer.toString(URN_NUM_BYTES_LIMIT) - + " bytes (when URL encoded)"); - } - if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { - throw new IllegalArgumentException( - "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); - } - try { - Urn.createFromString(urn.toString()); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } public static SearchResult validateSearchResult( @Nonnull OperationContext opContext, @@ -407,7 +293,7 @@ private static Stream validateSearchUrns( .filter( urn -> { try { - validateUrn(opContext.getEntityRegistry(), urn); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), urn); return true; } catch (Exception e) { log.warn( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 9875e7efab63a..feacc24423edb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -45,8 +45,8 @@ import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.entity.validation.ValidationException; -import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; import com.linkedin.metadata.models.AspectSpec; @@ -988,7 +988,7 @@ public void testRollbackKey() throws AssertionError { CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); RecordTemplate writeKey1 = - EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); + EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); // Ingest CorpUserInfo Aspect #1 Overwrite CorpUserInfo writeAspect1Overwrite = @@ -1075,7 +1075,7 @@ public void testRollbackUrn() throws AssertionError { CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); RecordTemplate writeKey1 = - EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); + EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); // Ingest CorpUserInfo Aspect #2 CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email2@test.com"); @@ -1246,10 +1246,10 @@ public void testIngestGetLatestAspect() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writeAspect2, readAspect2)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); + EntityApiUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); verify(_mockProducer, times(1)) .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); @@ -1328,10 +1328,10 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { DataTemplateUtil.areEqual(writeAspect2, new CorpUserInfo(readAspect2.getValue().data()))); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); + EntityApiUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); verify(_mockProducer, times(2)) .produceMetadataChangeLog( @@ -1452,14 +1452,14 @@ public void testIngestSameAspect() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writeAspect2, readAspect2)); assertFalse( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); assertFalse( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata1)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata1)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata3)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata3)); verify(_mockProducer, times(0)) .produceMetadataChangeLog(Mockito.any(), Mockito.any(), Mockito.any()); @@ -1727,12 +1727,12 @@ public void testRestoreIndices() throws Exception { public void testValidateUrn() throws Exception { // Valid URN Urn validTestUrn = new Urn("li", "corpuser", new TupleKey("testKey")); - ValidationUtils.validateUrn(opContext.getEntityRegistry(), validTestUrn); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), validTestUrn); // URN with trailing whitespace Urn testUrnWithTrailingWhitespace = new Urn("li", "corpuser", new TupleKey("testKey ")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnWithTrailingWhitespace); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnWithTrailingWhitespace); Assert.fail("Should have raised IllegalArgumentException for URN with trailing whitespace"); } catch (IllegalArgumentException e) { assertEquals( @@ -1744,7 +1744,7 @@ public void testValidateUrn() throws Exception { Urn testUrnTooLong = new Urn("li", "corpuser", new TupleKey(stringTooLong)); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLong); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLong); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals( @@ -1763,9 +1763,9 @@ public void testValidateUrn() throws Exception { Urn testUrnSameLengthWhenEncoded = new Urn("li", "corpUser", new TupleKey(buildStringSameLengthWhenEncoded.toString())); // Same length when encoded should be allowed, the encoded one should not be - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnSameLengthWhenEncoded); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnSameLengthWhenEncoded); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLongWhenEncoded); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLongWhenEncoded); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals( @@ -1775,9 +1775,9 @@ public void testValidateUrn() throws Exception { // Urn containing disallowed character Urn testUrnSpecialCharValid = new Urn("li", "corpUser", new TupleKey("bob␇")); Urn testUrnSpecialCharInvalid = new Urn("li", "corpUser", new TupleKey("bob␟")); - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharValid); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharValid); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharInvalid); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharInvalid); Assert.fail( "Should have raised IllegalArgumentException for URN containing the illegal char"); } catch (IllegalArgumentException e) { @@ -1786,7 +1786,7 @@ public void testValidateUrn() throws Exception { Urn urnWithMismatchedParens = new Urn("li", "corpuser", new TupleKey("test(Key")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), urnWithMismatchedParens); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), urnWithMismatchedParens); Assert.fail("Should have raised IllegalArgumentException for URN with mismatched parens"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("mismatched paren nesting")); @@ -1794,7 +1794,7 @@ public void testValidateUrn() throws Exception { Urn invalidType = new Urn("li", "fakeMadeUpType", new TupleKey("testKey")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), invalidType); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), invalidType); Assert.fail( "Should have raised IllegalArgumentException for URN with non-existent entity type"); } catch (IllegalArgumentException e) { @@ -1803,12 +1803,12 @@ public void testValidateUrn() throws Exception { Urn validFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD")); - ValidationUtils.validateUrn(opContext.getEntityRegistry(), validFabricType); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), validFabricType); Urn invalidFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "prod")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), invalidFabricType); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), invalidFabricType); Assert.fail("Should have raised IllegalArgumentException for URN with invalid fabric type"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(invalidFabricType.toString())); @@ -1817,7 +1817,7 @@ public void testValidateUrn() throws Exception { Urn urnEndingInComma = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD", "")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), urnEndingInComma); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), urnEndingInComma); Assert.fail("Should have raised IllegalArgumentException for URN ending in comma"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(urnEndingInComma.toString())); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java index 17eae455aa4c4..f89d599ccc12a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java @@ -12,8 +12,8 @@ import com.linkedin.data.DataMap; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.entity.validation.ValidationException; -import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.key.DatasetKey; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -43,14 +43,14 @@ public void testValidateOrThrowThrowsOnMissingUnrecognizedField() { rawMap.put("removed", true); rawMap.put("extraField", 1); Status status = new Status(rawMap); - assertThrows(ValidationException.class, () -> ValidationUtils.validateOrThrow(status)); + assertThrows(ValidationException.class, () -> ValidationApiUtils.validateOrThrow(status)); } @Test public void testValidateOrThrowThrowsOnMissingRequiredField() { DataMap rawMap = new DataMap(); BrowsePath status = new BrowsePath(rawMap); - assertThrows(ValidationException.class, () -> ValidationUtils.validateOrThrow(status)); + assertThrows(ValidationException.class, () -> ValidationApiUtils.validateOrThrow(status)); } @Test @@ -59,14 +59,14 @@ public void testValidateOrThrowDoesNotThrowOnMissingOptionalField() throws Excep Owner owner = new Owner(rawMap); owner.setOwner(Urn.createFromString("urn:li:corpuser:test")); owner.setType(OwnershipType.DATAOWNER); - ValidationUtils.validateOrThrow(owner); + ValidationApiUtils.validateOrThrow(owner); } @Test public void testValidateOrThrowDoesNotThrowOnMissingDefaultField() { DataMap rawMap = new DataMap(); Status status = new Status(rawMap); - ValidationUtils.validateOrThrow(status); + ValidationApiUtils.validateOrThrow(status); } @Test @@ -75,7 +75,7 @@ public void testConvertEntityUrnToKeyUrlEncoded() throws URISyntaxException { Urn.createFromString( "urn:li:dataset:(urn:li:dataPlatform:s3,urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29,PROD)"); - ValidationUtils.validateUrn(entityRegistry, urn); + ValidationApiUtils.validateUrn(entityRegistry, urn); final AspectSpec keyAspectSpec = entityRegistry.getEntitySpec(urn.getEntityType()).getKeyAspectSpec(); @@ -94,6 +94,6 @@ public void testConvertEntityUrnToKeyUrlEncoded() throws URISyntaxException { "urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29"); assertThrows( IllegalArgumentException.class, - () -> ValidationUtils.validateUrn(entityRegistry, invalidUrn)); + () -> ValidationApiUtils.validateUrn(entityRegistry, invalidUrn)); } } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java index e9f013c5a227f..b06b7df1846bd 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java @@ -45,7 +45,6 @@ import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DatasetKey; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -90,14 +89,12 @@ public class EntityChangeEventGeneratorHookTest { private Urn actorUrn; private SystemEntityClient _mockClient; - private EntityService _mockEntityService; private EntityChangeEventGeneratorHook _entityChangeEventHook; @BeforeMethod public void setupTest() throws URISyntaxException { actorUrn = Urn.createFromString(TEST_ACTOR_URN); _mockClient = Mockito.mock(SystemEntityClient.class); - _mockEntityService = Mockito.mock(EntityService.class); EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry = createEntityChangeEventGeneratorRegistry(); _entityChangeEventHook = diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java similarity index 91% rename from metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTest.java rename to metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java index 0b0f7c7bce031..c2a8de161eafe 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java @@ -17,7 +17,11 @@ import org.testng.annotations.Test; @SpringBootTest( - classes = {MCLSpringTestConfiguration.class, ConfigurationProvider.class}, + classes = { + MCLSpringCommonTestConfiguration.class, + MCLSpringGMSTestConfiguration.class, + ConfigurationProvider.class + }, properties = { "ingestionScheduler.enabled=false", "configEntityRegistry.path=../../metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml", @@ -27,7 +31,7 @@ locations = "classpath:/application.yaml", properties = {"MCL_CONSUMER_ENABLED=true"}) @EnableAutoConfiguration(exclude = {CassandraAutoConfiguration.class}) -public class MCLSpringTest extends AbstractTestNGSpringContextTests { +public class MCLGMSSpringTest extends AbstractTestNGSpringContextTests { @Test public void testHooks() { diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java new file mode 100644 index 0000000000000..23de7707cc571 --- /dev/null +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java @@ -0,0 +1,56 @@ +package com.linkedin.metadata.kafka.hook.spring; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertTrue; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.kafka.MetadataChangeLogProcessor; +import com.linkedin.metadata.kafka.hook.UpdateIndicesHook; +import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook; +import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook; +import com.linkedin.metadata.kafka.hook.ingestion.IngestionSchedulerHook; +import com.linkedin.metadata.kafka.hook.siblings.SiblingAssociationHook; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@SpringBootTest( + classes = {MCLSpringCommonTestConfiguration.class, ConfigurationProvider.class}, + properties = { + "entityClient.impl=restli", + "ingestionScheduler.enabled=false", + "configEntityRegistry.path=../../metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml", + "kafka.schemaRegistry.type=INTERNAL" + }) +@TestPropertySource( + locations = "classpath:/application.yaml", + properties = {"MCL_CONSUMER_ENABLED=true"}) +@EnableAutoConfiguration(exclude = {CassandraAutoConfiguration.class}) +public class MCLMAESpringTest extends AbstractTestNGSpringContextTests { + + @Test + public void testHooks() { + MetadataChangeLogProcessor metadataChangeLogProcessor = + applicationContext.getBean(MetadataChangeLogProcessor.class); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .noneMatch(hook -> hook instanceof IngestionSchedulerHook)); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .anyMatch(hook -> hook instanceof UpdateIndicesHook)); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .anyMatch(hook -> hook instanceof SiblingAssociationHook)); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .anyMatch(hook -> hook instanceof EntityChangeEventGeneratorHook)); + assertEquals( + 1, + metadataChangeLogProcessor.getHooks().stream() + .filter(hook -> hook instanceof IncidentsSummaryHook) + .count()); + } +} diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java similarity index 96% rename from metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java rename to metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java index dac4e98b62513..2666f58de862e 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java @@ -7,7 +7,6 @@ import com.datahub.metadata.ingestion.IngestionScheduler; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; @@ -39,7 +38,7 @@ "com.linkedin.gms.factory.entity.update.indices", "com.linkedin.gms.factory.timeline.eventgenerator" }) -public class MCLSpringTestConfiguration { +public class MCLSpringCommonTestConfiguration { @MockBean public EntityRegistry entityRegistry; @@ -58,8 +57,6 @@ public class MCLSpringTestConfiguration { @MockBean public ElasticSearchService searchService; - @MockBean public EntityService entityService; - @MockBean public FormService formService; @MockBean(name = "systemAuthentication") diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringGMSTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringGMSTestConfiguration.java new file mode 100644 index 0000000000000..b7c5433ae1592 --- /dev/null +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringGMSTestConfiguration.java @@ -0,0 +1,8 @@ +package com.linkedin.metadata.kafka.hook.spring; + +import com.linkedin.metadata.entity.EntityService; +import org.springframework.boot.test.mock.mockito.MockBean; + +public class MCLSpringGMSTestConfiguration { + @MockBean EntityService entityService; +} diff --git a/metadata-models-custom/README.md b/metadata-models-custom/README.md index d0274f2bc4e0e..1d26251bc13c9 100644 --- a/metadata-models-custom/README.md +++ b/metadata-models-custom/README.md @@ -48,7 +48,7 @@ Change your directory to the metadata-models-custom folder and then run this com This will create a zip file in the build/dist folder. Then change your directory back to the main datahub folder and run ``` -./gradlew :metadata-models-custom:install +./gradlew :metadata-models-custom:modelDeploy ``` This will install the zip file as a datahub plugin. It is installed at `~/.datahub/plugins/models/` and if you list the directory you should see the following path if you are following the customDataQualityRules implementation example: `~/.datahub/plugins/models/mycompany-dq-model/0.0.0-dev/` @@ -467,6 +467,73 @@ plugins: aspectName: customDataQualityRules ``` +#### Spring Support + +Validators, mutators, and side-effects can also utilize Spring to inject dependencies and autoconfigure them. While Spring is +not required, it is possible to use Spring to both inject autoconfiguration and the plugins themselves. An example Spring-enabled +validator has been included in the package `com.linkedin.metadata.aspect.plugins.spring.validation`. The plugin +class loader and Spring context is isolated so conflicts between DataHub and custom classes are avoided. + +The configuration of a Spring enabled plugin looks like the following, note the addition of `spring.enabled: true` below. +A list of packages to scan for Spring configuration and components should also be provided which should include +your custom classes with Spring annotations per the `packageScan` below. + +```yaml +plugins: + aspectPayloadValidators: + - className: 'com.linkedin.metadata.aspect.plugins.spring.validation.CustomDataQualityRulesValidator' + packageScan: + - com.linkedin.metadata.aspect.plugins.spring.validation + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules + spring: + enabled: true +``` + +In the Spring example, a configuration component called `CustomDataQualityRulesConfig` provides a string `Spring injection works!` demonstrating +injection of a bean into a function which is called by Spring after constructing the custom validator plugin. + +```java +@Configuration +public class CustomDataQualityRulesConfig { + @Bean("myCustomMessage") + public String myCustomMessage() { + return "Spring injection works!"; + } +} +``` + +```java +@Component +@Import(CustomDataQualityRulesConfig.class) +public class CustomDataQualityRulesValidator extends AspectPayloadValidator { + @Autowired + @Qualifier("myCustomMessage") + private String myCustomMessage; + + @PostConstruct + public void message() { + System.out.println(myCustomMessage); + } + + // ... +} +``` + +Example Log: + +``` +INFO c.l.m.m.r.PluginEntityRegistryLoader:187 - com.linkedin.metadata.models.registry.PluginEntityRegistryLoader@144e466d: Registry mycompany-dq-model:0.0.0-dev discovered. Loading... +INFO c.l.m.m.registry.PatchEntityRegistry:143 - Loading custom config entity file: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev/entity-registry.yaml, dir: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev +INFO c.l.m.m.registry.PatchEntityRegistry:143 - Loading custom config entity file: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev/entity-registry.yaml, dir: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev +Spring injection works! +``` + + ## The Future Hopefully this repository shows you how easily you can extend and customize DataHub's metadata model! diff --git a/metadata-models-custom/build.gradle b/metadata-models-custom/build.gradle index 8bf9d3b2f491e..412c19194c733 100644 --- a/metadata-models-custom/build.gradle +++ b/metadata-models-custom/build.gradle @@ -14,7 +14,7 @@ buildscript { } plugins { - id 'java-library' + id 'java' id 'maven-publish' id 'pegasus' } @@ -27,17 +27,14 @@ if (project.hasProperty('projVersion')) { dependencies { implementation spec.product.pegasus.data - // Uncomment these if you want to depend on models defined in core datahub - // DataQualityRuleEvent in this example uses Urn and TimeseriesAspectBase - implementation project(':li-utils') - dataModel project(':li-utils') - implementation project(':metadata-models') - dataModel project(':metadata-models') - - // Required for custom code plugins - implementation project(':entity-registry') - // Required for MCL/MCP hooks - implementation project (':metadata-io') + + // Core DataHub dependencies + implementation project(path: ':metadata-integration:java:custom-plugin-lib', configuration: 'shadow') + // DataModel DataHub dependencies + dataModel project(path: ':metadata-integration:java:custom-plugin-lib', configuration: 'shadow') + + // Required for Spring-enabled plugins only + implementation externalDependency.springBootAutoconfigure } def deployBaseDir = findProperty('pluginModelsDir') ?: file(project.gradle.gradleUserHomeDir.parent + "/.datahub/plugins/models") diff --git a/metadata-models-custom/registry/entity-registry.yaml b/metadata-models-custom/registry/entity-registry.yaml index e6180172837e0..70790bafb4ef5 100644 --- a/metadata-models-custom/registry/entity-registry.yaml +++ b/metadata-models-custom/registry/entity-registry.yaml @@ -16,6 +16,17 @@ plugins: supportedEntityAspectNames: - entityName: 'dataset' aspectName: customDataQualityRules + - className: 'com.linkedin.metadata.aspect.plugins.spring.validation.CustomDataQualityRulesValidator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules + spring: + enabled: true + packageScan: + - com.linkedin.metadata.aspect.plugins.spring.validation mutationHooks: - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMutator' enabled: true diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java index 7960ec5fa6322..e8783541c9916 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java @@ -14,9 +14,7 @@ public class CustomDataQualityRulesMCLSideEffect extends MCLSideEffect { - public CustomDataQualityRulesMCLSideEffect(AspectPluginConfig config) { - super(config); - } + private AspectPluginConfig config; @Override protected Stream applyMCLSideEffect( @@ -69,4 +67,16 @@ private Optional buildEvent(MetadataChangeLog originMCP) { return Optional.empty(); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesMCLSideEffect setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java index 193c56d904e99..de9d3419c216e 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java @@ -12,9 +12,7 @@ public class CustomDataQualityRulesMCPSideEffect extends MCPSideEffect { - public CustomDataQualityRulesMCPSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + private AspectPluginConfig config; @Override protected Stream applyMCPSideEffect( @@ -34,4 +32,16 @@ protected Stream applyMCPSideEffect( .build(retrieverContext.getAspectRetriever()); }); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesMCPSideEffect setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java index 9b48ed2c9975c..6b2f31a505c62 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java @@ -13,9 +13,7 @@ public class CustomDataQualityRulesMutator extends MutationHook { - public CustomDataQualityRulesMutator(AspectPluginConfig config) { - super(config); - } + private AspectPluginConfig config; @Override protected Stream> writeMutation( @@ -43,4 +41,16 @@ protected Stream> writeMutation( .filter(Objects::nonNull) .map(changeMCP -> Pair.of(changeMCP, true)); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesMutator setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesConfig.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesConfig.java new file mode 100644 index 0000000000000..f6dbaa2c7fcbe --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesConfig.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.aspect.plugins.spring.validation; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class CustomDataQualityRulesConfig { + @Bean("myCustomMessage") + public String myCustomMessage() { + return "Spring injection works!"; + } +} diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java new file mode 100644 index 0000000000000..3cd1acaf9645d --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java @@ -0,0 +1,111 @@ +package com.linkedin.metadata.aspect.plugins.spring.validation; + +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.mycompany.dq.DataQualityRules; +import java.util.Collection; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import javax.annotation.PostConstruct; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.springframework.stereotype.Component; + +/** + * Same as the non-Spring example however this is an example of using Spring to inject the plugins. + * + *

This also allows use of other Spring enabled libraries + */ +@Component +@Import(CustomDataQualityRulesConfig.class) +public class CustomDataQualityRulesValidator extends AspectPayloadValidator { + @Autowired + @Qualifier("myCustomMessage") + private String myCustomMessage; + + private AspectPluginConfig config; + + @PostConstruct + public void message() { + System.out.println(myCustomMessage); + } + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + + return mcpItems.stream() + .map( + item -> { + DataQualityRules rules = new DataQualityRules(item.getRecordTemplate().data()); + // Enforce at least 1 rule + return rules.getRules().isEmpty() + ? new AspectValidationException( + item.getUrn(), item.getAspectName(), "At least one rule is required.") + : null; + }) + .filter(Objects::nonNull); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return changeMCPs.stream() + .flatMap( + changeMCP -> { + if (changeMCP.getPreviousSystemAspect() != null) { + DataQualityRules oldRules = changeMCP.getPreviousAspect(DataQualityRules.class); + DataQualityRules newRules = changeMCP.getAspect(DataQualityRules.class); + + Map newFieldTypeMap = + newRules.getRules().stream() + .filter(rule -> rule.getField() != null) + .map(rule -> Map.entry(rule.getField(), rule.getType())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // Ensure the old and new field type is the same + return oldRules.getRules().stream() + .map( + oldRule -> { + if (!newFieldTypeMap + .getOrDefault(oldRule.getField(), oldRule.getType()) + .equals(oldRule.getType())) { + return new AspectValidationException( + changeMCP.getUrn(), + changeMCP.getAspectName(), + String.format( + "Field type mismatch. Field: %s Old: %s New: %s", + oldRule.getField(), + oldRule.getType(), + newFieldTypeMap.get(oldRule.getField()))); + } + return null; + }) + .filter(Objects::nonNull); + } + + return Stream.empty(); + }); + } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesValidator setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } +} diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java index 7a785bfbd1abb..b95d3381d9c8f 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java @@ -14,9 +14,7 @@ public class CustomDataQualityRulesValidator extends AspectPayloadValidator { - public CustomDataQualityRulesValidator(AspectPluginConfig config) { - super(config); - } + private AspectPluginConfig config; @Override protected Stream validateProposedAspects( @@ -75,4 +73,16 @@ protected Stream validatePreCommitAspects( return Stream.empty(); }); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesValidator setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java index f78dcf1b80156..f1518f9c8f9d7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java @@ -1,10 +1,17 @@ package com.linkedin.gms.factory.entityregistry; +import com.datahub.plugins.metadata.aspect.SpringPluginFactory; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; import java.io.IOException; +import java.util.List; +import java.util.function.BiFunction; import javax.annotation.Nonnull; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.core.io.Resource; @@ -12,6 +19,8 @@ @Configuration public class ConfigEntityRegistryFactory { + @Autowired private ApplicationContext applicationContext; + @Value("${configEntityRegistry.path}") private String entityRegistryConfigPath; @@ -21,10 +30,13 @@ public class ConfigEntityRegistryFactory { @Bean(name = "configEntityRegistry") @Nonnull protected ConfigEntityRegistry getInstance() throws IOException, EntityRegistryException { + BiFunction, PluginFactory> pluginFactoryProvider = + (config, loaders) -> new SpringPluginFactory(applicationContext, config, loaders); if (entityRegistryConfigPath != null) { - return new ConfigEntityRegistry(entityRegistryConfigPath); + return new ConfigEntityRegistry(entityRegistryConfigPath, pluginFactoryProvider); } else { - return new ConfigEntityRegistry(entityRegistryResource.getInputStream()); + return new ConfigEntityRegistry( + entityRegistryResource.getInputStream(), pluginFactoryProvider); } } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java index addc08246f1ae..0e9664cefdb7a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java @@ -1,10 +1,15 @@ package com.linkedin.gms.factory.entityregistry; +import com.datahub.plugins.metadata.aspect.SpringPluginFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.config.EntityRegistryPluginConfiguration; import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader; import java.io.FileNotFoundException; import java.net.MalformedURLException; +import java.util.List; +import java.util.function.BiFunction; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; @@ -18,11 +23,16 @@ public class PluginEntityRegistryFactory { @Bean(name = "pluginEntityRegistry") @Nonnull - protected PluginEntityRegistryLoader getInstance(ConfigurationProvider configurationProvider) + protected PluginEntityRegistryLoader getInstance( + @Nonnull final ConfigurationProvider configurationProvider) throws FileNotFoundException, MalformedURLException { EntityRegistryPluginConfiguration pluginConfiguration = configurationProvider.getDatahub().getPlugin().getEntityRegistry(); + BiFunction, PluginFactory> pluginFactoryProvider = + (config, loaders) -> new SpringPluginFactory(null, config, loaders); return new PluginEntityRegistryLoader( - pluginConfiguration.getPath(), pluginConfiguration.getLoadDelaySeconds()); + pluginConfiguration.getPath(), + pluginConfiguration.getLoadDelaySeconds(), + pluginFactoryProvider); } } diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java index 48481e8e5b576..3fba99a4b197f 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java @@ -90,7 +90,7 @@ public EntityRegistry entityRegistry() throws EntityRegistryException, Interrupt dependency. */ PluginEntityRegistryLoader custom = - new PluginEntityRegistryLoader(getClass().getResource("/custom-model").getFile(), 60); + new PluginEntityRegistryLoader(getClass().getResource("/custom-model").getFile(), 60, null); ConfigEntityRegistry standard = new ConfigEntityRegistry( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index b8166d50cc4d0..6c0da39d40ce9 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -27,8 +27,8 @@ import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -450,7 +450,7 @@ public ResponseEntity createAspect( authentication, true); - AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); ChangeMCP upsert = toUpsertItem( opContext.getRetrieverContext().get().getAspectRetriever(), @@ -523,7 +523,7 @@ public ResponseEntity patchAspect( RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectName, 0); - AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); GenericPatchTemplate genericPatchTemplate = GenericPatchTemplate.builder() .genericJsonPatch(patch) @@ -642,7 +642,7 @@ private Map> toAspectMap( } private AspectSpec lookupAspectSpec(Urn urn, String aspectName) { - return entityRegistry.getEntitySpec(urn.getEntityType()).getAspectSpec(aspectName); + return lookupAspectSpec(entityRegistry.getEntitySpec(urn.getEntityType()), aspectName); } private RecordTemplate toRecordTemplate(AspectSpec aspectSpec, EnvelopedAspect envelopedAspect) { @@ -721,7 +721,7 @@ private AspectsBatch toMCPBatch( if (aspect.getValue().has("systemMetadata")) { builder.systemMetadata( - EntityUtils.parseSystemMetadata( + EntityApiUtils.parseSystemMetadata( objectMapper.writeValueAsString(aspect.getValue().get("systemMetadata")))); } @@ -761,4 +761,18 @@ public List toEntityListResponse( } return responseList; } + + /** + * Case-insensitive fallback + * + * @return + */ + private static AspectSpec lookupAspectSpec(EntitySpec entitySpec, String aspectName) { + return Optional.ofNullable(entitySpec.getAspectSpec(aspectName)) + .orElse( + entitySpec.getAspectSpecs().stream() + .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) + .findFirst() + .get()); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index 796406ed746c9..d32f508a27d2c 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -20,8 +20,11 @@ import io.swagger.v3.oas.models.responses.ApiResponse; import io.swagger.v3.oas.models.responses.ApiResponses; import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -84,10 +87,10 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { addAspectSchemas(components, a); components.addSchemas( upperAspectName + ASPECT_REQUEST_SUFFIX, - buildAspectRefSchema(upperAspectName, false)); + buildAspectRefRequestSchema(upperAspectName)); components.addSchemas( upperAspectName + ASPECT_RESPONSE_SUFFIX, - buildAspectRefSchema(upperAspectName, true)); + buildAspectRefResponseSchema(upperAspectName)); }); // --> Entity components entityRegistry.getEntitySpecs().values().stream() @@ -318,7 +321,7 @@ private static PathItem buildListEntityPath(final EntitySpec entity) { .parameters( List.of( new Parameter() - .in(NAME_ASYNC) + .in(NAME_QUERY) .name("async") .description("Use async ingestion for high throughput.") .schema(new Schema().type(TYPE_BOOLEAN)._default(true)), @@ -409,7 +412,11 @@ private static Parameter buildParameterSchema( final Schema schema = new Schema() .type(TYPE_ARRAY) - .items(new Schema().type(TYPE_STRING)._enum(aspectNames)._default(aspectNames)); + .items( + new Schema() + .type(TYPE_STRING) + ._enum(aspectNames) + ._default(aspectNames.stream().findFirst().orElse(null))); return new Parameter() .in(NAME_QUERY) .name("aspects") @@ -434,6 +441,16 @@ private static void addAspectSchemas(final Components components, final AspectSp final String newDefinition = definition.replaceAll("definitions", "components/schemas"); Schema s = Json.mapper().readValue(newDefinition, Schema.class); + // Set nullable attribute + Optional.ofNullable(s.getProperties()) + .orElse(new HashMap()) + .forEach( + (name, schema) -> + ((Schema) schema) + .setNullable( + !Optional.ofNullable(s.getRequired()) + .orElse(new ArrayList()) + .contains(name))); components.addSchemas(n, s); } catch (Exception e) { throw new RuntimeException(e); @@ -444,24 +461,25 @@ private static void addAspectSchemas(final Components components, final AspectSp } } - private static Schema buildAspectRefSchema( - final String aspectName, final boolean withSystemMetadata) { + private static Schema buildAspectRefResponseSchema(final String aspectName) { final Schema result = new Schema<>() .type(TYPE_OBJECT) .description(ASPECT_DESCRIPTION) .required(List.of(PROPERTY_VALUE)) .addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName)); - if (withSystemMetadata) { - result.addProperty( - "systemMetadata", - new Schema<>() - .$ref(PATH_DEFINITIONS + "SystemMetadata") - .description("System metadata for the aspect.")); - } + result.addProperty( + "systemMetadata", + new Schema<>() + .$ref(PATH_DEFINITIONS + "SystemMetadata") + .description("System metadata for the aspect.")); return result; } + private static Schema buildAspectRefRequestSchema(final String aspectName) { + return new Schema<>().$ref(PATH_DEFINITIONS + aspectName); + } + private static Schema buildEntitySchema( final EntitySpec entity, Set aspectNames, final boolean withSystemMetadata) { final Map properties = diff --git a/metadata-service/plugin/build.gradle b/metadata-service/plugin/build.gradle index 3f91b8f6ae6ba..f519eba4921d2 100644 --- a/metadata-service/plugin/build.gradle +++ b/metadata-service/plugin/build.gradle @@ -6,6 +6,8 @@ dependencies { implementation 'org.apache.commons:commons-lang3:3.12.0' implementation project(path: ':metadata-auth:auth-api') + implementation project(':entity-registry') + implementation externalDependency.springContext implementation externalDependency.guava implementation externalDependency.jacksonDataBind implementation externalDependency.jacksonDataFormatYaml diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/metadata/aspect/SpringPluginFactory.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/metadata/aspect/SpringPluginFactory.java new file mode 100644 index 0000000000000..8a080c8d9076e --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/metadata/aspect/SpringPluginFactory.java @@ -0,0 +1,132 @@ +package com.datahub.plugins.metadata.aspect; + +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.PluginSpec; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.springframework.context.ApplicationContext; +import org.springframework.context.annotation.AnnotationConfigApplicationContext; + +@Slf4j +public class SpringPluginFactory extends PluginFactory { + + @Nullable private final ApplicationContext springApplicationContext; + + public SpringPluginFactory( + @Nullable ApplicationContext springApplicationContext, + @Nullable PluginConfiguration pluginConfiguration, + @Nonnull List classLoaders) { + super(pluginConfiguration, classLoaders); + + String[] packageScan = + extractPackageScan( + Optional.ofNullable(pluginConfiguration) + .map(PluginConfiguration::streamAll) + .orElse(Stream.of())) + .toArray(String[]::new); + + if (springApplicationContext != null || packageScan.length == 0) { + this.springApplicationContext = springApplicationContext; + } else { + AnnotationConfigApplicationContext rootContext = null; + + for (ClassLoader classLoader : classLoaders) { + AnnotationConfigApplicationContext applicationContext = + new AnnotationConfigApplicationContext(); + applicationContext.setId("custom-plugin"); + if (rootContext != null) { + applicationContext.setParent(rootContext); + } + applicationContext.setClassLoader(classLoader); + applicationContext.scan(packageScan); + rootContext = applicationContext; + } + rootContext.refresh(); + this.springApplicationContext = rootContext; + } + + loadPlugins(); + } + + private static Stream extractPackageScan(Stream configStream) { + return filterSpringConfigs(configStream) + .map(AspectPluginConfig::getPackageScan) + .filter(Objects::nonNull) + .flatMap(Collection::stream) + .distinct(); + } + + private static Stream filterSpringConfigs( + Stream configStream) { + return configStream.filter( + config -> config.getSpring() != null && config.getSpring().isEnabled()); + } + + /** + * Override to inject classes from Spring + * + * @param baseClazz + * @param configs + * @param packageNames + * @return + * @param + */ + @Override + protected List build( + Class baseClazz, List packageNames, List configs) { + + // load non-spring + List result = new ArrayList<>(super.build(baseClazz, packageNames, configs)); + + if (springApplicationContext == null) { + return result; + } + + // consider Spring dependency injection + for (AspectPluginConfig config : + filterSpringConfigs(configs.stream()).collect(Collectors.toSet())) { + boolean loaded = false; + + for (ClassLoader classLoader : getClassLoaders()) { + try { + Class clazz = classLoader.loadClass(config.getClassName()); + + final T plugin; + if (config.getSpring().getName() == null) { + plugin = (T) springApplicationContext.getBean(clazz); + } else { + plugin = (T) springApplicationContext.getBean(config.getSpring().getName(), clazz); + } + + if (plugin.enabled()) { + result.add((T) plugin.setConfig(config)); + } + + loaded = true; + break; + } catch (ClassNotFoundException e) { + log.warn( + "Failed to load class {} from loader {}", + config.getClassName(), + classLoader.getName()); + } + } + + if (!loaded) { + log.error("Failed to load Spring plugin {}!", config.getClassName()); + } + } + + return result; + } +} diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 17a67bb70ff08..4116b8ad30b94 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -8,6 +8,7 @@ import static com.linkedin.metadata.authorization.ApiOperation.DELETE; import static com.linkedin.metadata.authorization.ApiOperation.EXISTS; import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.entity.validation.ValidationApiUtils.validateOrThrow; import static com.linkedin.metadata.entity.validation.ValidationUtils.*; import static com.linkedin.metadata.resources.restli.RestliConstants.*; import static com.linkedin.metadata.search.utils.SearchUtils.*; diff --git a/settings.gradle b/settings.gradle index f553bf97ec14b..a09e9a650803f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -43,6 +43,7 @@ include 'metadata-testing:metadata-models-test-utils' include 'metadata-testing:metadata-test-utils' include 'entity-registry' include 'metadata-io' +include 'metadata-io:metadata-io-api' include 'datahub-upgrade' include 'metadata-utils' include 'li-utils' @@ -53,6 +54,7 @@ include 'metadata-models-custom' include 'entity-registry:custom-test-model' include 'metadata-integration:java:spark-lineage' include 'metadata-integration:java:datahub-client' +include 'metadata-integration:java:custom-plugin-lib' include 'metadata-integration:java:datahub-event' include 'metadata-integration:java:datahub-protobuf' include 'metadata-integration:java:openlineage-converter' From 384c917b63581d59f70ca6dc9f278a9cbfb68e75 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Thu, 9 May 2024 13:57:44 -0700 Subject: [PATCH 07/15] docs(impact analysis): Add column level impact analysis graphql example (#10427) --- docs/api/tutorials/lineage.md | 59 ++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/docs/api/tutorials/lineage.md b/docs/api/tutorials/lineage.md index cf83538c41ac3..c30307098d613 100644 --- a/docs/api/tutorials/lineage.md +++ b/docs/api/tutorials/lineage.md @@ -137,7 +137,7 @@ You can now see the column-level lineage between datasets. Note that you have to

-## Read Lineage +## Read Table Lineage @@ -199,3 +199,60 @@ curl --location --request POST 'http://localhost:8080/api/graphql' \ This will perform a multi-hop lineage search on the urn specified. For more information about the `searchAcrossLineage` mutation, please refer to [searchAcrossLineage](https://datahubproject.io/docs/graphql/queries/#searchacrosslineage). + +## Read Column Lineage + + + + +```graphql +query searchAcrossLineage { + searchAcrossLineage( + input: { + query: "*" + urn: "urn:li:schemaField(urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD),profile_id)" + start: 0 + count: 10 + direction: DOWNSTREAM + orFilters: [ + { + and: [ + { + condition: EQUAL + negated: false + field: "degree" + values: ["1", "2", "3+"] + } + ] + } + ] + } + ) { + searchResults { + degree + entity { + urn + type + } + } + } +} +``` + +This example shows using lineage degrees as a filter, but additional search filters can be included here as well. + + + + +```shell +curl --location --request POST 'http://localhost:8080/api/graphql' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' --data-raw '{ { "query": "query searchAcrossLineage { searchAcrossLineage( input: { query: \"*\" urn: \"urn:li:schemaField(urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD),profile_id)\" start: 0 count: 10 direction: DOWNSTREAM orFilters: [ { and: [ { condition: EQUAL negated: false field: \"degree\" values: [\"1\", \"2\", \"3+\"] } ] } ] } ) { searchResults { degree entity { urn type } } }}" +}}' +``` + + + + +This will perform a multi-hop lineage search on the urn specified. You can see schemaField URNs are made up of two parts: first the table they are a column of, and second the path of the column. For more information about the `searchAcrossLineage` mutation, please refer to [searchAcrossLineage](https://datahubproject.io/docs/graphql/queries/#searchacrosslineage). + From fe33ce7ce6e374c3e1faeec3469b6c658fdef54f Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 9 May 2024 16:22:28 -0500 Subject: [PATCH 08/15] fix(entity-registry): fix plugin load error (#10476) --- .../com/linkedin/metadata/aspect/plugins/PluginFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java index 7fa1454691df9..c812aea0c55d7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java @@ -157,10 +157,10 @@ protected static List initPlugins( "Error constructing entity registry plugin class: {}", config.getClassName(), e); - return Stream.empty(); + return (T) null; } }) - .map(plugin -> (T) plugin) + .filter(Objects::nonNull) .filter(PluginSpec::enabled) .collect(Collectors.toList()); } From de0163481506a39231e86a11dba131099f4b98d4 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 9 May 2024 17:20:03 -0500 Subject: [PATCH 09/15] fix(openapi): fix lookupAspectSpec (#10478) --- .../openapi/v2/controller/EntityController.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 6c0da39d40ce9..d844acb6f6f97 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -768,11 +768,11 @@ public List toEntityListResponse( * @return */ private static AspectSpec lookupAspectSpec(EntitySpec entitySpec, String aspectName) { - return Optional.ofNullable(entitySpec.getAspectSpec(aspectName)) - .orElse( - entitySpec.getAspectSpecs().stream() - .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) - .findFirst() - .get()); + return entitySpec.getAspectSpec(aspectName) != null + ? entitySpec.getAspectSpec(aspectName) + : entitySpec.getAspectSpecs().stream() + .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) + .findFirst() + .get(); } } From 8439cd7927642089dedf842b15a6a5dabdfa3eee Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 10 May 2024 12:00:43 -0500 Subject: [PATCH 10/15] fix(openapi-v3): comprehensive aspect name casing fix (#10484) --- .../v2/controller/EntityController.java | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index d844acb6f6f97..41cf972079c25 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -294,7 +294,9 @@ public ResponseEntity getAspect( .flatMap( e -> e.getAspects().entrySet().stream() - .filter(entry -> entry.getKey().equals(aspectName)) + .filter( + entry -> + entry.getKey().equals(lookupAspectSpec(urn, aspectName).getName())) .map(Map.Entry::getValue) .findFirst())); } @@ -324,7 +326,7 @@ public ResponseEntity headAspect( authentication, true); - return exists(opContext, urn, aspectName) + return exists(opContext, urn, lookupAspectSpec(urn, aspectName).getName()) ? ResponseEntity.noContent().build() : ResponseEntity.notFound().build(); } @@ -414,7 +416,8 @@ public void deleteAspect( authentication, true); - entityService.deleteAspect(opContext, entityUrn, aspectName, Map.of(), true); + entityService.deleteAspect( + opContext, entityUrn, lookupAspectSpec(urn, aspectName).getName(), Map.of(), true); } @Tag(name = "Generic Aspects") @@ -521,9 +524,9 @@ public ResponseEntity patchAspect( authentication, true); - RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectName, 0); - AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); + RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectSpec.getName(), 0); + GenericPatchTemplate genericPatchTemplate = GenericPatchTemplate.builder() .genericJsonPatch(patch) @@ -560,7 +563,7 @@ public ResponseEntity patchAspect( .build( objectMapper, Map.of( - aspectName, + aspectSpec.getName(), Pair.of( result.getNewValue(), withSystemMetadata ? result.getNewSystemMetadata() : null))))); @@ -598,7 +601,11 @@ private List toRecordTemplates( Map> aspects = entityService.getLatestEnvelopedAspects( - opContext, urnsSet, resolveAspectNames(urnsSet, aspectNames)); + opContext, + urnsSet, + resolveAspectNames(urnsSet, aspectNames).stream() + .map(AspectSpec::getName) + .collect(Collectors.toSet())); return urns.stream() .map( @@ -612,18 +619,21 @@ private List toRecordTemplates( } } - private Set resolveAspectNames(Set urns, Set requestedNames) { - if (requestedNames.isEmpty()) { + private Set resolveAspectNames(Set urns, Set requestedAspectNames) { + if (requestedAspectNames.isEmpty()) { return urns.stream() .flatMap(u -> entityRegistry.getEntitySpec(u.getEntityType()).getAspectSpecs().stream()) - .map(AspectSpec::getName) .collect(Collectors.toSet()); } else { // ensure key is always present return Stream.concat( - requestedNames.stream(), urns.stream() - .map(u -> entityRegistry.getEntitySpec(u.getEntityType()).getKeyAspectName())) + .flatMap( + urn -> + requestedAspectNames.stream() + .map(aspectName -> lookupAspectSpec(urn, aspectName))), + urns.stream() + .map(u -> entityRegistry.getEntitySpec(u.getEntityType()).getKeyAspectSpec())) .collect(Collectors.toSet()); } } From 786fe457d028d3dfa28dbe0a521cf95f94ef092d Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Sun, 12 May 2024 19:31:53 -0400 Subject: [PATCH 11/15] feat(ingest/slack): Support profile ingestion using users:info (#10410) Co-authored-by: Shirshanka Das --- .../datahub/ingestion/source/slack/slack.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py index ef7301238e452..746f71fb0af37 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py +++ b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py @@ -97,6 +97,7 @@ def __init__(self, ctx: PipelineContext, config: SlackSourceConfig): self.rate_limiter = RateLimiter( max_calls=self.config.api_requests_per_min, period=60 ) + self._use_users_info = False @classmethod def create(cls, config_dict, ctx): @@ -239,19 +240,31 @@ def get_public_channels(self) -> Iterable[MetadataWorkUnit]: break def populate_user_profile(self, user_obj: CorpUser) -> None: + if not user_obj.slack_id: + return try: # https://api.slack.com/methods/users.profile.get with self.rate_limiter: - user_profile_res = self.get_slack_client().users_profile_get( - user=user_obj.slack_id - ) + if self._use_users_info: + user_profile_res = self.get_slack_client().users_info( + user=user_obj.slack_id + ) + user_profile_res = user_profile_res.get("user", {}) + else: + user_profile_res = self.get_slack_client().users_profile_get( + user=user_obj.slack_id + ) + logger.debug(f"User profile: {user_profile_res}") user_profile = user_profile_res.get("profile", {}) user_obj.title = user_profile.get("title") user_obj.image_url = user_profile.get("image_192") user_obj.phone = user_profile.get("phone") except Exception as e: if "missing_scope" in str(e): - raise e + if self._use_users_info: + raise e + self._use_users_info = True + self.populate_user_profile(user_obj) return def populate_slack_id_from_email(self, user_obj: CorpUser) -> None: From 10cbbd76194019d808c18dcc8ac5719bdfb8fa2a Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Mon, 13 May 2024 08:39:33 +0900 Subject: [PATCH 12/15] docs: fix docs utms & slack footer (#10481) --- README.md | 4 ++-- docs/slack.md | 2 +- metadata-ingestion/docs/dev_guides/profiling_ingestions.md | 4 ---- metadata-ingestion/docs/dev_guides/sql_profiles.md | 3 --- metadata-ingestion/docs/sources/hana/hana.md | 3 --- metadata-ingestion/sink_docs/console.md | 4 ---- metadata-ingestion/sink_docs/file.md | 3 --- 7 files changed, 3 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 0db06d29eac78..15289f663f7a8 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ HOSTED_DOCS_ONLY--> [![PyPI version](https://badge.fury.io/py/acryl-datahub.svg)](https://badge.fury.io/py/acryl-datahub) [![build & test](https://github.com/datahub-project/datahub/workflows/build%20&%20test/badge.svg?branch=master&event=push)](https://github.com/datahub-project/datahub/actions?query=workflow%3A%22build+%26+test%22+branch%3Amaster+event%3Apush) [![Docker Pulls](https://img.shields.io/docker/pulls/acryldata/datahub-gms.svg)](https://hub.docker.com/r/acryldata/datahub-gms) -[![Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link) +[![Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://datahubproject.io/slack?utm_source=github&utm_medium=readme&utm_campaign=github_readme) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/datahub-project/datahub/blob/master/docs/CONTRIBUTING.md) [![GitHub commit activity](https://img.shields.io/github/commit-activity/m/datahub-project/datahub)](https://github.com/datahub-project/datahub/pulls?q=is%3Apr) [![License](https://img.shields.io/github/license/datahub-project/datahub)](https://github.com/datahub-project/datahub/blob/master/LICENSE) @@ -106,7 +106,7 @@ We welcome contributions from the community. Please refer to our [Contributing G ## Community -Join our [Slack workspace](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link) for discussions and important announcements. You can also find out more about our upcoming [town hall meetings](docs/townhalls.md) and view past recordings. +Join our [Slack workspace](https://datahubproject.io/slack?utm_source=github&utm_medium=readme&utm_campaign=github_readme) for discussions and important announcements. You can also find out more about our upcoming [town hall meetings](docs/townhalls.md) and view past recordings. ## Adoption diff --git a/docs/slack.md b/docs/slack.md index e1ef4593791e0..3e36b1f2ea3e3 100644 --- a/docs/slack.md +++ b/docs/slack.md @@ -2,7 +2,7 @@ The DataHub Slack is a thriving and rapidly growing community - we can't wait for you to join us! -_[Sign up here](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link) to join us on Slack and to subscribe to the DataHub Community newsletter. Already a member? [Log in here](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)._ +_[Sign up here](https://datahubproject.io/slack?utm_source=docs&utm_medium=page_link&utm_campaign=docs_page_link) to join us on Slack and to subscribe to the DataHub Community newsletter. Already a member? [Log in here](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)._ ## Slack Guidelines diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md index 0825a8d61a66b..41b80bd72f029 100644 --- a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md +++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md @@ -88,7 +88,3 @@ This will generate an interactive HTML file for analysis: `memray` has an extensive set of features for memory investigation. Take a look at their [documentation](https://bloomberg.github.io/memray/overview.html) to see the full feature set. - -## Questions - -If you've got any questions on configuring profiling, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/docs/dev_guides/sql_profiles.md b/metadata-ingestion/docs/dev_guides/sql_profiles.md index d211ef1f3f473..994d6fe489331 100644 --- a/metadata-ingestion/docs/dev_guides/sql_profiles.md +++ b/metadata-ingestion/docs/dev_guides/sql_profiles.md @@ -28,6 +28,3 @@ Extracts: SQL profiling is supported for all SQL sources. Check the individual source page to verify if it supports profiling. -## Questions - -If you've got any questions on configuring profiling, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/docs/sources/hana/hana.md b/metadata-ingestion/docs/sources/hana/hana.md index 34f7df2e17441..53fff0e67f348 100644 --- a/metadata-ingestion/docs/sources/hana/hana.md +++ b/metadata-ingestion/docs/sources/hana/hana.md @@ -5,6 +5,3 @@ The implementation uses the [SQLAlchemy Dialect for SAP HANA](https://github.com Under the hood, [SQLAlchemy Dialect for SAP HANA](https://github.com/SAP/sqlalchemy-hana) uses the SAP HANA Python Driver hdbcli. Therefore it is compatible with HANA or HANA express versions since HANA SPS 2. -## Questions - -If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md index f24a3086c6153..539f1c5a56a22 100644 --- a/metadata-ingestion/sink_docs/console.md +++ b/metadata-ingestion/sink_docs/console.md @@ -27,7 +27,3 @@ sink: ## Config details None! - -## Questions - -If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md index 7064a4dcfc8a5..2991afacbd93d 100644 --- a/metadata-ingestion/sink_docs/file.md +++ b/metadata-ingestion/sink_docs/file.md @@ -36,6 +36,3 @@ Note that a `.` is used to denote nested fields in the YAML recipe. | -------- | -------- | ------- | ------------------------- | | filename | ✅ | | Path to file to write to. | -## Questions - -If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! From d217a6f885da4bba77508f826cfa5dd6f3ccd3f8 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Mon, 13 May 2024 06:07:02 -0700 Subject: [PATCH 13/15] feat(docs): Updating assertion docs + adding schema assertion doc (#10473) Co-authored-by: John Joyce --- docs-website/sidebars.js | 5 + .../observe/column-assertions.md | 149 +++------ .../observe/custom-sql-assertions.md | 158 +++------- .../observe/freshness-assertions.md | 149 +++------ .../observe/schema-assertions.md | 290 ++++++++++++++++++ .../observe/volume-assertions.md | 151 +++------ 6 files changed, 455 insertions(+), 447 deletions(-) create mode 100644 docs/managed-datahub/observe/schema-assertions.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index f26853a488b62..326bf804a3f92 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -97,6 +97,11 @@ module.exports = { id: "docs/managed-datahub/observe/column-assertions", className: "saasOnly", }, + { + type: "doc", + id: "docs/managed-datahub/observe/schema-assertions", + className: "saasOnly", + }, ], }, { diff --git a/docs/managed-datahub/observe/column-assertions.md b/docs/managed-datahub/observe/column-assertions.md index a0ca8abe26d51..3e5b903dc60ba 100644 --- a/docs/managed-datahub/observe/column-assertions.md +++ b/docs/managed-datahub/observe/column-assertions.md @@ -8,11 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -> ⚠️ The **Column Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only -> be available to a limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Column Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -36,6 +33,7 @@ Column Assertions are currently supported for: 2. Redshift 3. BigQuery 4. Databricks +5. DataHub Dataset Profile Metrics (collected via ingestion) Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion** tab. @@ -118,10 +116,11 @@ another always-increasing number - that can be used to find the "new rows" that ### Prerequisites 1. **Permissions**: To create or delete Column Assertions for a specific entity on DataHub, you'll need to be granted the - `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + `Edit Assertions` and `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. -2. **Data Platform Connection**: In order to create a Column Assertion, you'll need to have an **Ingestion Source** - configured to your Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Ingestion** tab. +2. (Optional) **Data Platform Connection**: In order to create a Column Assertion that queries the data source directly (instead of DataHub metadata), you'll need to have an **Ingestion Source** + configured to your Data Platform: Snowflake, BigQuery, or Redshift under the **Ingestion** tab. Once these are in place, you're ready to create your Column Assertions! @@ -130,14 +129,14 @@ Once these are in place, you're ready to create your Column Assertions! 1. Navigate to the Table that you want to monitor 2. Click the **Validations** tab -

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Column** @@ -149,15 +148,15 @@ Once these are in place, you're ready to create your Column Assertions! **Column Value** assertions are used to monitor the value of a specific column in a table, and ensure that every row adheres to a specific condition. **Column Metric** assertions are used to compute a metric for that column, and then compare the value of that metric to your expectations. -

- +

+

7. Configure the **column selection**. This defines the column that should be monitored by the Column Assertion. You can choose from any of the columns from the table listed in the dropdown. -

- +

+

8. Configure the **evaluation criteria**. This step varies based on the type of assertion you chose in the previous step. @@ -186,7 +185,7 @@ Once these are in place, you're ready to create your Column Assertions! have changed. A **High Watermark Column** is a column that contains a constantly-incrementing value - a date, a time, or another always-increasing number. When selected, a query will be issued to the table find only the rows which have changed since the last assertion run. -

+

@@ -206,11 +205,10 @@ Once these are in place, you're ready to create your Column Assertions! assertion. This is useful if you want to limit the assertion to a subset of rows in the table. Note this option will not be available if you choose **DataHub Dataset Profile** as the **source**. -11. Click **Next** -12. Configure actions that should be taken when the Column Assertion passes or fails +11. Configure actions that should be taken when the Column Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub `Column` Incident for the Table whenever the Column Assertion is failing. This @@ -219,48 +217,40 @@ Once these are in place, you're ready to create your Column Assertions! - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Column Assertion. Note that any other incidents will not be impacted. -10. Click **Save**. +12. Click **Next** and then **Save**. And that's it! DataHub will now begin to monitor your Column Assertion for the table. -To view the time of the next Column Assertion evaluation, simply click **Column** and then click on your -new Assertion: - -

- -

- Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Column Assertion -In order to temporarily stop the evaluation of a Column Assertion: +In order to temporarily stop the evaluation of the assertion: -1. Navigate to the **Validations** tab of the table with the assertion -2. Click **Column** to open the Column Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +1. Navigate to the **Validations** tab of the Table with the assertion +2. Click **Column** to open the Column Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Column Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

+ ## Creating Column Assertions via API -Under the hood, Acryl DataHub implements Column Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Column Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for the column metric. e.g. "The value of an integer column is greater than 10 for all rows in the table." This is the "what". - - **Monitor**: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific mechanisms. This is the "how". @@ -269,80 +259,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Column Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Column Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Column Assertion entity using the `createFieldAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Column Assertion, you can the `upsertDatasetColumnAssertionMonitor` mutation. ##### Examples -To create a Column Assertion Entity that checks that the value of an integer column is greater than 10: - -```json -mutation createFieldAssertion { - createFieldAssertion( - input: { - entityUrn: "", - type: FIELD_VALUES, - fieldValuesAssertion: { - field: { - path: "", - type: "NUMBER", - nativeType: "NUMBER(38,0)" - }, - operator: GREATER_THAN, - parameters: { - value: { - type: NUMBER, - value: "10" - } - }, - failThreshold: { - type: COUNT, - value: 0 - }, - excludeNulls: true - } - } - ) { - urn -} -} -``` - -To create an Assertion Monitor Entity that evaluates the column assertion every 8 hours using all rows in the table: +Creating a Field Values Column Assertion that runs every 8 hours: -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_FIELD, - datasetFieldParameters: { - sourceType: ALL_ROWS_QUERY - } - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using a query against all rows of the table). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetFieldAssertionMonitor` graphql endpoint for creating a Column Assertion and corresponding Monitor for a dataset. - -```json +```graphql mutation upsertDatasetFieldAssertionMonitor { upsertDatasetFieldAssertionMonitor( input: { @@ -376,7 +299,7 @@ mutation upsertDatasetFieldAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } @@ -384,7 +307,7 @@ mutation upsertDatasetFieldAssertionMonitor { You can use same endpoint with assertion urn input to update an existing Column Assertion and corresponding Monitor. -```json +```graphql mutation upsertDatasetFieldAssertionMonitor { upsertDatasetFieldAssertionMonitor( assertionUrn: "" @@ -419,7 +342,7 @@ mutation upsertDatasetFieldAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } diff --git a/docs/managed-datahub/observe/custom-sql-assertions.md b/docs/managed-datahub/observe/custom-sql-assertions.md index adf1c8bd44c8b..ce6e46cfda1fd 100644 --- a/docs/managed-datahub/observe/custom-sql-assertions.md +++ b/docs/managed-datahub/observe/custom-sql-assertions.md @@ -8,12 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; - -> ⚠️ The **Custom SQL Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a -> limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Custom SQL Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -72,8 +68,7 @@ At the most basic level, **Custom SQL Assertions** consist of a few important pa 1. An **Evaluation Schedule** 2. A **Query** -3. An **Condition Type** -4. An **Assertion Description** +3. A **Condition Type** In this section, we'll give an overview of each. @@ -96,6 +91,7 @@ Use the "Try it out" button to test your query and ensure that it returns a sing #### 3. Condition Type The **Condition Type**: This defines the conditions under which the Assertion will **fail**. The list of supported operations is: + - **Is Equal To**: The assertion will fail if the query result is equal to the configured value - **Is Not Equal To**: The assertion will fail if the query result is not equal to the configured value - **Is Greater Than**: The assertion will fail if the query result is greater than the configured value @@ -107,17 +103,14 @@ The **Condition Type**: This defines the conditions under which the Assertion wi Custom SQL Assertions also have an off switch: they can be started or stopped at any time with the click of button. -#### 4. Assertion Description - -The **Assertion Description**: This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion. - ## Creating a Custom SQL Assertion ### Prerequisites 1. **Permissions**: To create or delete Custom SQL Assertions for a specific entity on DataHub, you'll need to be granted the - `Edit Assertions`, `Edit Monitors`, **and the additional `Edit SQL Assertion Monitors`** privileges for the entity. This is granted to Entity owners by default. + `Edit Assertions`, `Edit Monitors`, **and the additional `Edit SQL Assertion Monitors`** privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. 2. **Data Platform Connection**: In order to create a Custom SQL Assertion, you'll need to have an **Ingestion Source** configured to your Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Integrations** tab. @@ -129,14 +122,14 @@ Once these are in place, you're ready to create your Custom SQL Assertions! 1. Navigate to the Table you want to monitor 2. Click the **Validations** tab -

- +

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Custom** @@ -146,82 +139,70 @@ Once these are in place, you're ready to create your Custom SQL Assertions! 6. Provide a SQL **query** that will be used to evaluate the Table. The query should return a single row with a single column. Currently only numeric values are supported (integer and floats). The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. Make sure to use the fully qualified name of the Table in your query. -

- +

+

7. Configure the evaluation **condition type**. This determines the cases in which the new assertion will fail when it is evaluated. -

- -

- -8. Add a **description** for the assertion. This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion. - -

- -

- -9. (Optional) Use the **Try it out** button to test your query and ensure that it returns a single row with a single column, and passes the configured condition type. - -

- +

+

-10. Click **Next** -11. Configure actions that should be taken when the Custom SQL Assertion passes or fails +8. Configure actions that should be taken when the Custom SQL Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom SQL Assertion is failing. This may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when an incident is created due to an Assertion failure. + - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom SQL Assertion. Note that any other incidents will not be impacted. -1. Click **Save**. -And that's it! DataHub will now begin to monitor your Custom SQL Assertion for the table. - -To view the time of the next Custom SQL Assertion evaluation, simply click **Custom** and then click on your -new Assertion: +9. (Optional) Use the **Try it out** button to test your query and ensure that it returns a single row with a single column, and passes the configured condition type. -

- +

+

+10. Click **Next** and then add a description. + +11. Click **Save** + +And that's it! DataHub will now begin to monitor your Custom SQL Assertion for the table. + Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Custom SQL Assertion -In order to temporarily stop the evaluation of a Custom SQL Assertion: +In order to temporarily stop the evaluation of the assertion: 1. Navigate to the **Validations** tab of the Table with the assertion -2. Click **Custom** to open the Custom SQL Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +2. Click **Custom SQL** to open the SQL Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Custom SQL Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

- ## Creating Custom SQL Assertions via API -Under the hood, Acryl DataHub implements Custom SQL Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Custom SQL Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for the custom assertion, e.g. "The table was changed in the past 7 hours" or "The table is changed on a schedule of every day by 8am". This is the "what". @@ -234,72 +215,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Custom SQL Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Custom SQL Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Custom SQL Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Custom SQL Assertion, you can use the `upsertDatasetSqlAssertionMonitor` mutation. ##### Examples -To create a Custom SQL Assertion Entity that checks whether a query result is greater than 100: - -```json -mutation createSqlAssertion { - createSqlAssertion( - input: { - entityUrn: "", - type: METRIC, - description: "", - statement: "", - operator: GREATER_THAN, - parameters: { - value: { - value: "100", - type: NUMBER - } - } - } - ) { - urn - } -} -``` - -The supported assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`, -you will need to provide a `changeType` parameter with either `ABSOLUTE` or `PERCENTAGE` values. -The supported operator types are `EQUAL_TO`, `NOT_EQUAL_TO`, `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). -The supported parameter types are `NUMBER`. - -To create an Assertion Monitor Entity that evaluates the custom assertion every 8 hours: - -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_SQL - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Information Schema). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetSqlAssertionMonitor` graphql endpoint for creating a Custom SQL Assertion and corresponding Monitor for a dataset. +To create a Custom SQL Assertion Entity that checks whether a query result is greater than 100 that runs every 8 hours: -```json +```graphql mutation upsertDatasetSqlAssertionMonitor { upsertDatasetSqlAssertionMonitor( input: { @@ -328,7 +250,7 @@ mutation upsertDatasetSqlAssertionMonitor { You can use same endpoint with assertion urn input to update an existing Custom SQL Assertion and corresponding Monitor. -```json +```graphql mutation upsertDatasetSqlAssertionMonitor { upsertDatasetSqlAssertionMonitor( assertionUrn: "" diff --git a/docs/managed-datahub/observe/freshness-assertions.md b/docs/managed-datahub/observe/freshness-assertions.md index 14ff828dc7376..5e80c9dd940dc 100644 --- a/docs/managed-datahub/observe/freshness-assertions.md +++ b/docs/managed-datahub/observe/freshness-assertions.md @@ -8,12 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; - -> ⚠️ The **Freshness Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a -> limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Freshness Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -44,6 +40,7 @@ Freshness Assertions are currently supported for: 2. Redshift 3. BigQuery 4. Databricks +5. DataHub Operations (collected via ingestion) Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion** tab. @@ -144,10 +141,12 @@ Freshness Assertions also have an off switch: they can be started or stopped at ### Prerequisites 1. **Permissions**: To create or delete Freshness Assertions for a specific entity on DataHub, you'll need to be granted the -`Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. +`Edit Assertions` and `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` +by default. -2. **Data Platform Connection**: In order to create a Freshness Assertion, you'll need to have an **Ingestion Source** configured to your -Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Integrations** tab. +2. (Optional) **Data Platform Connection**: In order to create a Freshness Assertion that queries the source data platform directly (instead of DataHub metadata), you'll need to have an **Ingestion Source** configured to your +Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab. + Once these are in place, you're ready to create your Freshness Assertions! @@ -156,14 +155,14 @@ Once these are in place, you're ready to create your Freshness Assertions! 1. Navigate to the Table that to monitor for freshness 2. Click the **Validations** tab -

- +

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Freshness** @@ -176,22 +175,22 @@ or _In the past X hours_ to configure a fixed interval that is used when checkin _Check whether the table has changed between subsequent evaluations of the check_ -

- +

+

_Check whether the table has changed in a specific window of time_ -

- +

+

7. (Optional) Click **Advanced** to customize the evaluation **source**. This is the mechanism that will be used to evaluate the check. Each Data Platform supports different options including Audit Log, Information Schema, Last Modified Column, High Watermark Column, and DataHub Operation. -

- +

+

- **Audit Log**: Check the Data Platform operational audit log to determine whether the table changed within the evaluation period. @@ -203,54 +202,48 @@ the check. Each Data Platform supports different options including Audit Log, In when using a fixed lookback period. - **DataHub Operation**: Use DataHub Operations to determine whether the table changed within the evaluation period. -1. Click **Next** -2. Configure actions that should be taken when the Freshness Assertion passes or fails +8. Configure actions that should be taken when the Freshness Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub `Freshness` Incident for the Table whenever the Freshness Assertion is failing. This may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when - an incident is created due to an Assertion failure. + an incident is created due to an Assertion failure. + - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Freshness Assertion. Note that - any other incidents will not be impacted. + any other incidents will not be impacted. + +9. Click **Next** and add a description. 10. Click **Save**. And that's it! DataHub will now begin to monitor your Freshness Assertion for the table. -To view the time of the next Freshness Assertion evaluation, simply click **Freshness** and then click on your -new Assertion: - -

- -

- Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Freshness Assertion -In order to temporarily stop the evaluation of a Freshness Assertion: +In order to temporarily stop the evaluation of the assertion: 1. Navigate to the **Validations** tab of the Table with the assertion -2. Click **Freshness** to open the Freshness Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +2. Click **Freshness** to open the Freshness Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Freshness Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

@@ -263,7 +256,7 @@ requiring any manual setup. If Acryl DataHub is able to detect a pattern in the change frequency of a Snowflake, Redshift, BigQuery, or Databricks Table, you'll find a recommended Smart Assertion under the `Validations` tab on the Table profile page: -

+

@@ -275,7 +268,7 @@ Don't need it anymore? Smart Assertions can just as easily be turned off by clic ## Creating Freshness Assertions via API -Under the hood, Acryl DataHub implements Freshness Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Freshness Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for freshness, e.g. "The table was changed int the past 7 hours" or "The table is changed on a schedule of every day by 8am". This is the "what". @@ -288,67 +281,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Freshness Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Freshness Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Freshness Assertion entity using the `createFreshnessAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Freshness Assertion, you can use the `upsertDatasetFreshnessAssertionMonitor` mutation. ##### Examples -To create a Freshness Assertion Entity that checks whether a table has been updated in the past 8 hours: - -```json -mutation createFreshnessAssertion { - createFreshnessAssertion( - input: { - entityUrn: "", - type: DATASET_CHANGE, - schedule: { - type: FIXED_INTERVAL, - fixedInterval: { unit: HOUR, multiple: 8 } - } - } - ) { - urn - } -} -``` - -This defines the user's expectation: that the table should have changed in the past 8 hours whenever the assertion is evaluated. +To create a Freshness Assertion Entity that checks whether a table has been updated in the past 8, and runs every 8 hours: -To create an Assertion Monitor Entity that evaluates the assertion every 8 hours using the Audit Log: - -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_FRESHNESS, - datasetFreshnessParameters: { - sourceType: AUDIT_LOG, - } - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Audit Log). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetFreshnessAssertionMonitor` graphql endpoint for creating a Freshness Assertion and corresponding Monitor for a dataset. - -```json +```graphql mutation upsertDatasetFreshnessAssertionMonitor { upsertDatasetFreshnessAssertionMonitor( input: { @@ -366,15 +305,15 @@ mutation upsertDatasetFreshnessAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } ``` -You can use same endpoint with assertion urn input to update an existing Freshness Assertion and corresponding Monitor. +You can use same endpoint with assertion urn input to update an existing Freshness Assertion and corresponding Monitor: -```json +```graphql mutation upsertDatasetFreshnessAssertionMonitor { upsertDatasetFreshnessAssertionMonitor( assertionUrn: "" @@ -393,7 +332,7 @@ mutation upsertDatasetFreshnessAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } @@ -408,7 +347,7 @@ to capture changes, or where the data platform's mechanism is not reliable. In o ##### Examples -```json +```graphql mutation reportOperation { reportOperation( input: { diff --git a/docs/managed-datahub/observe/schema-assertions.md b/docs/managed-datahub/observe/schema-assertions.md new file mode 100644 index 0000000000000..b7869218932d8 --- /dev/null +++ b/docs/managed-datahub/observe/schema-assertions.md @@ -0,0 +1,290 @@ +--- +description: This page provides an overview of working with DataHub Schema Assertions +--- +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + + +# Schema Assertions + + + +> The **Schema Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). + +## Introduction + +Can you remember a time when columns were unexpectedly added, removed, or altered for a key Table in your Data Warehouse? +Perhaps this caused downstream tables, views, dashboards, data pipelines, or AI models to break. + +There are many reasons why the structure of an important Table on Snowflake, Redshift, or BigQuery may schema change, breaking the expectations +of downstream consumers of the table. + +What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data +issues _before_ anyone else? With Acryl DataHub **Schema Assertions**, you can. + +Acryl DataHub allows users to define expectations about a table's columns and their data types, and will monitor and validate these expectations over +time, notifying you when a breaking change occurs. + +In this article, we'll cover the basics of monitoring Schema Assertions - what they are, how to configure them, and more - so that you and your team can +start building trust in your most important data assets. + +Let's get started! + +## Support + +Schema Assertions are currently supported for all data sources that provide a schema via the normal ingestion process. + +## What is a Schema Assertion? + +A **Schema Assertion** is a Data Quality rule used to monitor the columns in a particular table and their data types. +They allow you to define a set of "required" columns for the table along with their expected types, and then be notified +if anything changes via a failing assertion. + +This type of assertion can be particularly useful if you want to monitor the structure of a table which is outside of your +direct control, for example the result of an ETL process from an upstream application or tables provided by a 3rd party data vendor. It +allows you to get ahead of potentially breaking schema changes, by alerting you as soon as they occur, and before +they have a chance to negatively impact downstream assets. + +### Anatomy of a Schema Assertion + +At the most basic level, **Schema Assertions** consist of a few important parts: + +1. A **Condition Type** +2. A set of **Expected Columns** + +In this section, we'll give an overview of each. + +#### 1. Condition Type + +The **Condition Type** defines the conditions under which the Assertion will **fail**. More concretely, it determines +how the _expected_ columns should be compared to the _actual_ columns found in the schema to determine a passing or failing +state for the data quality check. + +The list of supported condition types: + +- **Contains**: The assertion will fail if the actual schema does not contain all expected columns and their types. +- **Exact Match**: The assertion will fail if the actual schema does not EXACTLY match the expected columns and their types. No + additional columns will be permitted. + +Schema Assertions will be evaluated whenever a change in the schema of the underlying table is detected. +They also have an off switch: they can be started or stopped at any time by pressing the start (play) or stop (pause) buttons. + + +#### 2. Expected Columns + +The **Expected Columns** are a set of column **names** along with their high-level **data +types** that should be used to compare against the _actual_ columns found in the table. By default, the expected column +set will be derived from the current set of columns found in the table. This conveniently allows you to "freeze" or "lock" +the current schema of a table in just a few clicks. + +Each "expected column" is composed of a + +1. **Name**: The name of the column that should be present in the table. Nested columns are supported in a flattened + fashion by simply providing a dot-separated path to the nested column. For example, `user.id` would be a nested column `id`. + In the case of a complex array or map, each field in the elements of the array or map will be treated as dot-delimited columns. + Note that verifying the specific type of object in primitive arrays or maps is not currently supported. Note that the comparison performed + is currently not case-sensitive. + +2. **Type**: The high-level data type of the column in the table. This type intentionally "high level" to allow for normal column widening practices + without the risk of failing the assertion unnecessarily. For example a `varchar(64)` and a `varchar(256)` will both resolve to the same high-level + "STRING" type. The currently supported set of data types include the following: + + - String + - Number + - Boolean + - Date + - Timestamp + - Struct + - Array + - Map + - Union + - Bytes + - Enum + +## Creating a Schema Assertion + +### Prerequisites + +- **Permissions**: To create or delete Schema Assertions for a specific entity on DataHub, you'll need to be granted the + `Edit Assertions`, `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. + +Once these are in place, you're ready to create your Schema Assertions! + +### Steps + +1. Navigate to the Table you want to monitor +2. Click the **Validations** tab + +

+ +

+ +3. Click **+ Create Assertion** + +

+ +

+ +4. Choose **Schema** + +5. Select the **condition type**. + +6. Define the **expected columns** that will be continually compared against the actual column set. This defaults to the current columns for the table. + +

+ +

+ +7. Configure actions that should be taken when the assertion passes or fails + +

+ +

+ +- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom SQL Assertion is failing. This + may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when + an incident is created due to an Assertion failure. + +- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom SQL Assertion. Note that + any other incidents will not be impacted. + +Then click **Next**. + +7. (Optional) Add a **description** for the assertion. This is a human-readable description of the assertion. If you do not provide one, a description will be generated for you. + +

+ +

+ +8. Click **Save**. + +And that's it! DataHub will now begin to monitor your Schema Assertion for the table. + +Once your assertion has run, you will begin to see Success or Failure status: + +

+ +

+ + +## Stopping a Schema Assertion + +In order to temporarily stop the evaluation of the assertion: + +1. Navigate to the **Validations** tab of the Table with the assertion +2. Click **Schema** to open the Schema Assertion +3. Click the "Stop" button. + +

+ +

+ +To resume the assertion, simply click **Start**. + +

+ +

+ + +## Creating Schema Assertions via API + +Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you'll need the +`Edit Assertions` and `Edit Monitors` privileges to create schema assertion via API. + +#### GraphQL + +In order to create a Schema Assertions, you can use the `upsertDatasetSchemaAssertionMonitor` mutation. + +##### Examples + +To create a Schema Assertion that checks for a the presence of a specific set of columns: + +```graphql +mutation upsertDatasetSchemaAssertionMonitor { + upsertDatasetSchemaAssertionMonitor( + input: { + entityUrn: "", + assertion: { + compatibility: SUPERSET, # How the actual columns will be compared against the expected fields (provided next) + fields: [ + { + path: "id", + type: STRING + }, + { + path: "count", + type: NUMBER + }, + { + path: "struct", + type: STRUCT + }, + { + path: "struct.nestedBooleanField", + type: BOOLEAN + } + ] + }, + description: "", + mode: ACTIVE + } + ) +} +``` + +The supported compatibility types are `EXACT_MATCH` and `SUPERSET` (Contains). + +You can use same endpoint with assertion urn input to update an existing Schema Assertion, simply add the `assertionUrn` field: + +```graphql +mutation upsertDatasetSchemaAssertionMonitor { + upsertDatasetSchemaAssertionMonitor( + assertionUrn: "urn:li:assertion:existing-assertion-id", + input: { + entityUrn: "", + assertion: { + compatibility: EXACT_MATCH, + fields: [ + { + path: "id", + type: STRING + }, + { + path: "count", + type: NUMBER + }, + { + path: "struct", + type: STRUCT + }, + { + path: "struct.nestedBooleanField", + type: BOOLEAN + } + ] + }, + description: "", + mode: ACTIVE + } + ) +} +``` + +You can delete assertions along with their monitors using GraphQL mutations: `deleteAssertion` and `deleteMonitor`. + +### Tips + +:::info +**Authorization** + +Remember to always provide a DataHub Personal Access Token when calling the GraphQL API. To do so, just add the 'Authorization' header as follows: + +``` +Authorization: Bearer +``` + +**Exploring GraphQL API** + +Also, remember that you can play with an interactive version of the Acryl GraphQL API at `https://your-account-id.acryl.io/api/graphiql` +::: diff --git a/docs/managed-datahub/observe/volume-assertions.md b/docs/managed-datahub/observe/volume-assertions.md index 2a98b37576c41..7d801933834ab 100644 --- a/docs/managed-datahub/observe/volume-assertions.md +++ b/docs/managed-datahub/observe/volume-assertions.md @@ -8,12 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; - -> ⚠️ The **Volume Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a -> limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Volume Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -45,6 +41,7 @@ Volume Assertions are currently supported for: 2. Redshift 3. BigQuery 4. Databricks +5. DataHub Dataset Profile (collected via ingestion) Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion** tab. @@ -137,10 +134,11 @@ Volume Assertions also have an off switch: they can be started or stopped at any ### Prerequisites 1. **Permissions**: To create or delete Volume Assertions for a specific entity on DataHub, you'll need to be granted the - `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + `Edit Assertions` and `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. -2. **Data Platform Connection**: In order to create a Volume Assertion, you'll need to have an **Ingestion Source** configured to your - Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Integrations** tab. +2. (Optional) **Data Platform Connection**: In order to create a Volume Assertion that queries the source data platform directly (instead of DataHub metadata), you'll need to have an **Ingestion Source** configured to your + Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab. Once these are in place, you're ready to create your Volume Assertions! @@ -149,14 +147,14 @@ Once these are in place, you're ready to create your Volume Assertions! 1. Navigate to the Table that to monitor for volume 2. Click the **Validations** tab -

- +

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Volume** @@ -166,69 +164,63 @@ Once these are in place, you're ready to create your Volume Assertions! 6. Configure the evaluation **condition type**. This determines the cases in which the new assertion will fail when it is evaluated. -

- +

+

7. (Optional) Click **Advanced** to customize the volume **source**. This is the mechanism that will be used to obtain the table row count metric. Each Data Platform supports different options including Information Schema, Query, and DataHub Dataset Profile. -

- +

+

- **Information Schema**: Check the Data Platform system metadata tables to determine the table row count. - **Query**: Issue a `COUNT(*)` query to the table to determine the row count. - **DataHub Dataset Profile**: Use the DataHub Dataset Profile metadata to determine the row count. -8. Click **Next** -9. Configure actions that should be taken when the Volume Assertion passes or fails +8. Configure actions that should be taken when the Volume Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub `Volume` Incident for the Table whenever the Volume Assertion is failing. This may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when an incident is created due to an Assertion failure. + - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Volume Assertion. Note that any other incidents will not be impacted. +9. Click **Next** and provide a description. + 10. Click **Save**. And that's it! DataHub will now begin to monitor your Volume Assertion for the table. -To view the time of the next Volume Assertion evaluation, simply click **Volume** and then click on your -new Assertion: - -

- -

- Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Volume Assertion -In order to temporarily stop the evaluation of a Volume Assertion: +In order to temporarily stop the evaluation of the assertion: 1. Navigate to the **Validations** tab of the Table with the assertion -2. Click **Volume** to open the Volume Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +2. Click **Volume** to open the Volume Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Volume Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

@@ -241,7 +233,7 @@ requiring any manual setup. If Acryl DataHub is able to detect a pattern in the volume of a Snowflake, Redshift, BigQuery, or Databricks Table, you'll find a recommended Smart Assertion under the `Validations` tab on the Table profile page: -

+

@@ -253,7 +245,7 @@ Don't need it anymore? Smart Assertions can just as easily be turned off by clic ## Creating Volume Assertions via API -Under the hood, Acryl DataHub implements Volume Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Volume Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for volume, e.g. "The table was changed int the past 7 hours" or "The table is changed on a schedule of every day by 8am". This is the "what". @@ -266,80 +258,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Volume Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Volume Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Volume Assertion entity using the `createVolumeAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Volume Assertion, you can use the `upsertDatasetVolumeAssertionMonitor` mutation. ##### Examples -To create a Volume Assertion Entity that checks whether a table has been updated in the past 8 hours: - -```json -mutation createVolumeAssertion { - createVolumeAssertion( - input: { - entityUrn: "", - type: ROW_COUNT_TOTAL, - rowCountTotal: { - operator: BETWEEN, - parameters: { - minValue: { - "value": 10, - "type": NUMBER - }, - maxValue: { - "value": 20, - "type": NUMBER - } - } - } - } - ) { - urn -} -} -``` - -To create an assertion that specifies that the row count total should always fall between 10 and 20. +To create a Volume Assertion Entity that verifies that the row count for a table is between 10 and 20 rows, and runs every 8 hours: -The supported volume assertion types are `ROW_COUNT_TOTAL` and `ROW_COUNT_CHANGE`. Other (e.g. incrementing segment) types are not yet supported. -The supported operator types are `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). -The supported parameter types are `NUMBER`. - -To create an Assertion Monitor Entity that evaluates the volume assertion every 8 hours using the Information Schema: - -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_VOLUME, - datasetVolumeParameters: { - sourceType: INFORMATION_SCHEMA, - } - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Information Schema). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetVolumeAssertionMonitor` graphql endpoint for creating a Volume Assertion and corresponding Monitor. - -```json +```graphql mutation upsertDatasetVolumeAssertionMonitor { upsertDatasetVolumeAssertionMonitor( input: { @@ -373,9 +298,13 @@ mutation upsertDatasetVolumeAssertionMonitor { } ``` -You can use same endpoint with assertion urn input to update an existing Volume Assertion and corresponding Monitor. +The supported volume assertion types are `ROW_COUNT_TOTAL` and `ROW_COUNT_CHANGE`. Other (e.g. incrementing segment) types are not yet supported. +The supported operator types are `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). +The supported parameter types are `NUMBER`. + +You can use same endpoint with assertion urn input to update an existing Volume Assertion and corresponding Monitor: -```json +```graphql mutation upsertDatasetVolumeAssertionMonitor { upsertDatasetVolumeAssertionMonitor( assertionUrn: "" From b8b7928dd40fae3cdf1a911de02770292c9aa56b Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 13 May 2024 14:37:36 -0500 Subject: [PATCH 14/15] fix(misc): misc fixes for OSS release (#10493) --- datahub-frontend/app/auth/AuthModule.java | 5 +- datahub-frontend/conf/application.conf | 4 +- .../upgrade/system/AbstractMCLStep.java | 8 -- .../metadata/aspect/batch/AspectsBatch.java | 22 ++- .../metadata/aspect/batch/ChangeMCP.java | 22 +++ .../java/com/datahub/util/RecordUtils.java | 2 +- .../entity/ebean/batch/AspectsBatchImpl.java | 18 --- .../entity/ebean/batch/ChangeItemImpl.java | 17 --- .../metadata/client/JavaEntityClient.java | 41 +++++- .../client/SystemJavaEntityClient.java | 6 +- .../metadata/entity/EntityServiceImpl.java | 20 ++- .../metadata/client/JavaEntityClientTest.java | 3 +- .../SampleDataFixtureConfiguration.java | 3 +- .../SearchLineageFixtureConfiguration.java | 3 +- ...eConsumerApplicationTestConfiguration.java | 3 +- .../src/main/resources/application.yaml | 6 + .../entityclient/JavaEntityClientFactory.java | 13 +- .../RestliEntityClientFactory.java | 15 +- .../entity/client/RestliEntityClient.java | 132 +++++++++++------- .../client/SystemRestliEntityClient.java | 5 +- .../common/client/BaseClientTest.java | 6 +- .../client/SystemRestliEntityClientTest.java | 8 +- .../metadata/service/BaseService.java | 15 +- 23 files changed, 237 insertions(+), 140 deletions(-) diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index 39357e7da12a7..7db8f5689ead5 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -62,6 +62,7 @@ public class AuthModule extends AbstractModule { private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider"; private static final String ENTITY_CLIENT_RETRY_INTERVAL = "entityClient.retryInterval"; private static final String ENTITY_CLIENT_NUM_RETRIES = "entityClient.numRetries"; + private static final String ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE = "entityClient.restli.get.batchSize"; private static final String GET_SSO_SETTINGS_ENDPOINT = "auth/getSsoSettings"; private final com.typesafe.config.Config _configs; @@ -201,11 +202,13 @@ protected ConfigurationProvider provideConfigurationProvider() { protected SystemEntityClient provideEntityClient( @Named("systemOperationContext") final OperationContext systemOperationContext, final ConfigurationProvider configurationProvider) { + return new SystemRestliEntityClient( buildRestliClient(), new ExponentialBackoff(_configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)), _configs.getInt(ENTITY_CLIENT_NUM_RETRIES), - configurationProvider.getCache().getClient().getEntityClient()); + configurationProvider.getCache().getClient().getEntityClient(), + Math.max(1, _configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE))); } @Provides diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf index 0f4ddb7c497e6..6aa58d5b13b2c 100644 --- a/datahub-frontend/conf/application.conf +++ b/datahub-frontend/conf/application.conf @@ -288,4 +288,6 @@ systemClientSecret=${?DATAHUB_SYSTEM_CLIENT_SECRET} entityClient.retryInterval = 2 entityClient.retryInterval = ${?ENTITY_CLIENT_RETRY_INTERVAL} entityClient.numRetries = 3 -entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES} \ No newline at end of file +entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES} +entityClient.restli.get.batchSize = 100 +entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE} \ No newline at end of file diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java index d28b741fedd2a..66cc90f60ed71 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java @@ -125,14 +125,6 @@ public Function executable() { } }); - entityService - .streamRestoreIndices(opContext, args, x -> context.report().addLine((String) x)) - .forEach( - result -> { - context.report().addLine("Rows migrated: " + result.rowsMigrated); - context.report().addLine("Rows ignored: " + result.ignored); - }); - BootstrapStep.setUpgradeResult(opContext, getUpgradeIdUrn(), entityService); context.report().addLine("State updated: " + getUpgradeIdUrn()); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java index 79f3a23c5c5e8..031625da0477c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -15,6 +16,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import org.apache.commons.lang3.StringUtils; /** * A batch of aspects in the context of either an MCP or MCL write path to a data store. The item is @@ -191,5 +193,23 @@ static Map> merge( Pair::getValue, Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))); } - String toAbbreviatedString(int maxWidth); + default String toAbbreviatedString(int maxWidth) { + return toAbbreviatedString(getItems(), maxWidth); + } + + static String toAbbreviatedString(Collection items, int maxWidth) { + List itemsAbbreviated = new ArrayList(); + items.forEach( + item -> { + if (item instanceof ChangeMCP) { + itemsAbbreviated.add(((ChangeMCP) item).toAbbreviatedString()); + } else { + itemsAbbreviated.add(item.toString()); + } + }); + return "AspectsBatchImpl{" + + "items=" + + StringUtils.abbreviate(itemsAbbreviated.toString(), maxWidth) + + '}'; + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java index 19896e2b03544..18c7b477a9df8 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java @@ -4,8 +4,10 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.aspect.SystemAspect; import java.lang.reflect.InvocationTargetException; +import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import org.apache.commons.lang3.StringUtils; /** * A proposal to write data to the primary datastore which includes system metadata and other @@ -47,4 +49,24 @@ default T getPreviousAspect(Class clazz) { } return null; } + + default String toAbbreviatedString() { + return "ChangeMCP{" + + "changeType=" + + getChangeType() + + ", urn=" + + getUrn() + + ", aspectName='" + + getAspectName() + + '\'' + + ", recordTemplate=" + + Optional.ofNullable(getRecordTemplate()) + .map(template -> StringUtils.abbreviate(template.toString(), 256)) + .orElse("") + + ", systemMetadata=" + + Optional.ofNullable(getSystemMetadata()) + .map(systemMetadata -> StringUtils.abbreviate(systemMetadata.toString(), 128)) + .orElse("") + + '}'; + } } diff --git a/li-utils/src/main/java/com/datahub/util/RecordUtils.java b/li-utils/src/main/java/com/datahub/util/RecordUtils.java index d57875f79de61..8183ecc21ee27 100644 --- a/li-utils/src/main/java/com/datahub/util/RecordUtils.java +++ b/li-utils/src/main/java/com/datahub/util/RecordUtils.java @@ -463,7 +463,7 @@ private static Object invokeMethod(@Nonnull RecordTemplate record, @Nonnull Stri METHOD_CACHE.putIfAbsent(record.getClass(), getMethodsFromRecordTemplate(record)); try { return METHOD_CACHE.get(record.getClass()).get(fieldName).invoke(record); - } catch (IllegalAccessException | InvocationTargetException e) { + } catch (NullPointerException | IllegalAccessException | InvocationTargetException e) { throw new RuntimeException( String.format( "Failed to execute method for class [%s], field [%s]", diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index ad1e26575d7c0..0914df744e413 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -11,7 +11,6 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.util.Pair; -import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; @@ -23,7 +22,6 @@ import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; @Slf4j @Getter @@ -156,20 +154,4 @@ public int hashCode() { public String toString() { return "AspectsBatchImpl{" + "items=" + items + '}'; } - - public String toAbbreviatedString(int maxWidth) { - List itemsAbbreviated = new ArrayList(); - items.forEach( - item -> { - if (item instanceof ChangeItemImpl) { - itemsAbbreviated.add(((ChangeItemImpl) item).toAbbreviatedString()); - } else { - itemsAbbreviated.add(item.toString()); - } - }); - return "AspectsBatchImpl{" - + "items=" - + StringUtils.abbreviate(itemsAbbreviated.toString(), maxWidth) - + '}'; - } } diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index d6c12f2dffc91..2f3bce6e75e14 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -31,7 +31,6 @@ import lombok.Setter; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; @Slf4j @Getter @@ -250,20 +249,4 @@ public String toString() { + systemMetadata + '}'; } - - public String toAbbreviatedString() { - return "ChangeItemImpl{" - + "changeType=" - + changeType - + ", urn=" - + urn - + ", aspectName='" - + aspectName - + '\'' - + ", recordTemplate=" - + StringUtils.abbreviate(recordTemplate.toString(), 256) - + ", systemMetadata=" - + StringUtils.abbreviate(systemMetadata.toString(), 128) - + '}'; - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 5006788fa9d76..ec25a2fee76d5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -7,6 +7,7 @@ import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterators; import com.linkedin.aspect.GetTimeseriesAspectValuesResponse; import com.linkedin.common.AuditStamp; import com.linkedin.common.VersionedUrn; @@ -59,6 +60,8 @@ import java.net.URISyntaxException; import java.time.Clock; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -90,6 +93,7 @@ public class JavaEntityClient implements EntityClient { private final TimeseriesAspectService timeseriesAspectService; private final RollbackService rollbackService; private final EventProducer eventProducer; + private final int batchGetV2Size; @Override @Nullable @@ -121,7 +125,22 @@ public Map batchGetV2( throws RemoteInvocationException, URISyntaxException { final Set projectedAspects = aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames; - return entityService.getEntitiesV2(opContext, entityName, urns, projectedAspects); + + Map responseMap = new HashMap<>(); + + Iterators.partition(urns.iterator(), Math.max(1, batchGetV2Size)) + .forEachRemaining( + batch -> { + try { + responseMap.putAll( + entityService.getEntitiesV2( + opContext, entityName, new HashSet<>(batch), projectedAspects)); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } @Override @@ -130,11 +149,25 @@ public Map batchGetVersionedV2( @Nonnull OperationContext opContext, @Nonnull String entityName, @Nonnull final Set versionedUrns, - @Nullable final Set aspectNames) - throws RemoteInvocationException, URISyntaxException { + @Nullable final Set aspectNames) { final Set projectedAspects = aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames; - return entityService.getEntitiesVersionedV2(opContext, versionedUrns, projectedAspects); + + Map responseMap = new HashMap<>(); + + Iterators.partition(versionedUrns.iterator(), Math.max(1, batchGetV2Size)) + .forEachRemaining( + batch -> { + try { + responseMap.putAll( + entityService.getEntitiesVersionedV2( + opContext, new HashSet<>(batch), projectedAspects)); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java index deaf3e835615a..ab68abc69bce7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java @@ -42,7 +42,8 @@ public SystemJavaEntityClient( TimeseriesAspectService timeseriesAspectService, RollbackService rollbackService, EventProducer eventProducer, - EntityClientCacheConfig cacheConfig) { + EntityClientCacheConfig cacheConfig, + int batchGetV2Size) { super( entityService, deleteEntityService, @@ -52,7 +53,8 @@ public SystemJavaEntityClient( lineageSearchService, timeseriesAspectService, rollbackService, - eventProducer); + eventProducer, + batchGetV2Size); this.operationContextMap = CacheBuilder.newBuilder().maximumSize(500).build(); this.entityClientCache = buildEntityClientCache(SystemJavaEntityClient.class, cacheConfig); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 01ed02ae848ef..0093921a83f9e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -666,14 +666,8 @@ public List ingestAspects( return Collections.emptyList(); } - log.info("Ingesting aspects batch to database: {}", aspectsBatch.toAbbreviatedString(2048)); - Timer.Context ingestToLocalDBTimer = - MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); List ingestResults = ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); - long took = ingestToLocalDBTimer.stop(); - log.info( - "Ingestion of aspects batch to database took {} ms", TimeUnit.NANOSECONDS.toMillis(took)); List mclResults = emitMCL(opContext, ingestResults, emitMCL); return mclResults; @@ -778,7 +772,17 @@ private List ingestAspectsToLocalDB( throw new ValidationException(exceptions.toString()); } + // No changes, return + if (changeMCPs.isEmpty()) { + return Collections.emptyList(); + } + // Database Upsert results + log.info( + "Ingesting aspects batch to database: {}", + AspectsBatch.toAbbreviatedString(changeMCPs, 2048)); + Timer.Context ingestToLocalDBTimer = + MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); List upsertResults = changeMCPs.stream() .map( @@ -827,6 +831,10 @@ private List ingestAspectsToLocalDB( if (tx != null) { tx.commitAndContinue(); } + long took = ingestToLocalDBTimer.stop(); + log.info( + "Ingestion of aspects batch to database took {} ms", + TimeUnit.NANOSECONDS.toMillis(took)); // Retention optimization and tx if (retentionService != null) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java b/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java index 3a10875d1a60a..2ca966b104e03 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java @@ -70,7 +70,8 @@ private JavaEntityClient getJavaEntityClient() { _lineageSearchService, _timeseriesAspectService, rollbackService, - _eventProducer); + _eventProducer, + 1); } @Test diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 60d1333be272d..5da970b46afc7 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -315,6 +315,7 @@ private EntityClient entityClientHelper( null, null, null, - null); + null, + 1); } } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index d2bc670ac64a0..34598821f43fd 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -250,6 +250,7 @@ protected EntityClient entityClient( null, null, null, - null); + null, + 1); } } diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java index feb3869abd391..08ff802c37e40 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java @@ -46,7 +46,8 @@ public SystemEntityClient systemEntityClient( restClient, new ExponentialBackoff(1), 1, - configurationProvider.getCache().getClient().getEntityClient()); + configurationProvider.getCache().getClient().getEntityClient(), + 1); } @MockBean public Database ebeanServer; diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index c6397c3ce5abb..27ccd8851fdf0 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -380,6 +380,12 @@ views: entityClient: retryInterval: ${ENTITY_CLIENT_RETRY_INTERVAL:2} numRetries: ${ENTITY_CLIENT_NUM_RETRIES:3} + java: + get: + batchSize: ${ENTITY_CLIENT_JAVA_GET_BATCH_SIZE:375} # matches EbeanAspectDao batch size + restli: + get: + batchSize: ${ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE:100} # limited to prevent exceeding restli URI size limit usageClient: retryInterval: ${USAGE_CLIENT_RETRY_INTERVAL:2} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java index 2f92f0ad5bf9f..fc35e6d045d0c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java @@ -16,6 +16,7 @@ import com.linkedin.metadata.timeseries.TimeseriesAspectService; import javax.inject.Singleton; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -37,7 +38,8 @@ public EntityClient entityClient( final @Qualifier("timeseriesAspectService") TimeseriesAspectService _timeseriesAspectService, final @Qualifier("relationshipSearchService") LineageSearchService _lineageSearchService, final @Qualifier("kafkaEventProducer") EventProducer _eventProducer, - final RollbackService rollbackService) { + final RollbackService rollbackService, + final @Value("${entityClient.restli.get.batchSize:375}") int batchGetV2Size) { return new JavaEntityClient( _entityService, _deleteEntityService, @@ -47,7 +49,8 @@ public EntityClient entityClient( _lineageSearchService, _timeseriesAspectService, rollbackService, - _eventProducer); + _eventProducer, + batchGetV2Size); } @Bean("systemEntityClient") @@ -63,7 +66,8 @@ public SystemEntityClient systemEntityClient( final @Qualifier("relationshipSearchService") LineageSearchService _lineageSearchService, final @Qualifier("kafkaEventProducer") EventProducer _eventProducer, final RollbackService rollbackService, - final EntityClientCacheConfig entityClientCacheConfig) { + final EntityClientCacheConfig entityClientCacheConfig, + final @Value("${entityClient.restli.get.batchSize:375}") int batchGetV2Size) { return new SystemJavaEntityClient( _entityService, _deleteEntityService, @@ -74,6 +78,7 @@ public SystemEntityClient systemEntityClient( _timeseriesAspectService, rollbackService, _eventProducer, - entityClientCacheConfig); + entityClientCacheConfig, + batchGetV2Size); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java index 9da7fc706d08a..2d9f570e1b07d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java @@ -29,7 +29,8 @@ public EntityClient entityClient( @Value("${datahub.gms.uri}") String gmsUri, @Value("${datahub.gms.sslContext.protocol}") String gmsSslProtocol, @Value("${entityClient.retryInterval:2}") int retryInterval, - @Value("${entityClient.numRetries:3}") int numRetries) { + @Value("${entityClient.numRetries:3}") int numRetries, + final @Value("${entityClient.restli.get.batchSize:150}") int batchGetV2Size) { final Client restClient; if (gmsUri != null) { restClient = DefaultRestliClientFactory.getRestLiClient(URI.create(gmsUri), gmsSslProtocol); @@ -37,7 +38,8 @@ public EntityClient entityClient( restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); } - return new RestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries); + return new RestliEntityClient( + restClient, new ExponentialBackoff(retryInterval), numRetries, batchGetV2Size); } @Bean("systemEntityClient") @@ -50,7 +52,8 @@ public SystemEntityClient systemEntityClient( @Value("${datahub.gms.sslContext.protocol}") String gmsSslProtocol, @Value("${entityClient.retryInterval:2}") int retryInterval, @Value("${entityClient.numRetries:3}") int numRetries, - final EntityClientCacheConfig entityClientCacheConfig) { + final EntityClientCacheConfig entityClientCacheConfig, + final @Value("${entityClient.restli.get.batchSize:150}") int batchGetV2Size) { final Client restClient; if (gmsUri != null) { @@ -60,6 +63,10 @@ public SystemEntityClient systemEntityClient( DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); } return new SystemRestliEntityClient( - restClient, new ExponentialBackoff(retryInterval), numRetries, entityClientCacheConfig); + restClient, + new ExponentialBackoff(retryInterval), + numRetries, + entityClientCacheConfig, + batchGetV2Size); } } diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index 21246407f2029..70fae208ad77a 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -3,6 +3,7 @@ import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterators; import com.linkedin.common.VersionedUrn; import com.linkedin.common.client.BaseClient; import com.linkedin.common.urn.Urn; @@ -108,11 +109,15 @@ public class RestliEntityClient extends BaseClient implements EntityClient { new PlatformRequestBuilders(); private static final RunsRequestBuilders RUNS_REQUEST_BUILDERS = new RunsRequestBuilders(); + private final int batchGetV2Size; + public RestliEntityClient( @Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, - int retryCount) { + int retryCount, + int batchGetV2Size) { super(restliClient, backoffPolicy, retryCount); + this.batchGetV2Size = Math.max(1, batchGetV2Size); } @Override @@ -195,10 +200,10 @@ public Map batchGet( /** * Batch get a set of aspects for multiple entities. * + * @param opContext operation's context * @param entityName the entity type to fetch * @param urns the urns of the entities to batch get * @param aspectNames the aspect names to batch get - * @param authentication the authentication to include in the request to the Metadata Service * @throws RemoteInvocationException when unable to execute request */ @Override @@ -210,29 +215,43 @@ public Map batchGetV2( @Nullable final Set aspectNames) throws RemoteInvocationException, URISyntaxException { - final EntitiesV2BatchGetRequestBuilder requestBuilder = - ENTITIES_V2_REQUEST_BUILDERS - .batchGet() - .aspectsParam(aspectNames) - .ids(urns.stream().map(Urn::toString).collect(Collectors.toList())); - - return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) - .getEntity() - .getResults() - .entrySet() - .stream() - .collect( - Collectors.toMap( - entry -> { - try { - return Urn.createFromString(entry.getKey()); - } catch (URISyntaxException e) { - throw new RuntimeException( - String.format( - "Failed to bind urn string with value %s into urn", entry.getKey())); - } - }, - entry -> entry.getValue().getEntity())); + Map responseMap = new HashMap<>(); + + Iterators.partition(urns.iterator(), batchGetV2Size) + .forEachRemaining( + batch -> { + try { + final EntitiesV2BatchGetRequestBuilder requestBuilder = + ENTITIES_V2_REQUEST_BUILDERS + .batchGet() + .aspectsParam(aspectNames) + .ids(batch.stream().map(Urn::toString).collect(Collectors.toList())); + + responseMap.putAll( + sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) + .getEntity() + .getResults() + .entrySet() + .stream() + .collect( + Collectors.toMap( + entry -> { + try { + return Urn.createFromString(entry.getKey()); + } catch (URISyntaxException e) { + throw new RuntimeException( + String.format( + "Failed to bind urn string with value %s into urn", + entry.getKey())); + } + }, + entry -> entry.getValue().getEntity()))); + } catch (RemoteInvocationException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } /** @@ -250,31 +269,44 @@ public Map batchGetVersionedV2( @Nonnull OperationContext opContext, @Nonnull String entityName, @Nonnull final Set versionedUrns, - @Nullable final Set aspectNames) - throws RemoteInvocationException, URISyntaxException { - - final EntitiesVersionedV2BatchGetRequestBuilder requestBuilder = - ENTITIES_VERSIONED_V2_REQUEST_BUILDERS - .batchGet() - .aspectsParam(aspectNames) - .entityTypeParam(entityName) - .ids( - versionedUrns.stream() - .map( - versionedUrn -> - com.linkedin.common.urn.VersionedUrn.of( - versionedUrn.getUrn().toString(), versionedUrn.getVersionStamp())) - .collect(Collectors.toSet())); - - return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) - .getEntity() - .getResults() - .entrySet() - .stream() - .collect( - Collectors.toMap( - entry -> UrnUtils.getUrn(entry.getKey().getUrn()), - entry -> entry.getValue().getEntity())); + @Nullable final Set aspectNames) { + + Map responseMap = new HashMap<>(); + + Iterators.partition(versionedUrns.iterator(), batchGetV2Size) + .forEachRemaining( + batch -> { + final EntitiesVersionedV2BatchGetRequestBuilder requestBuilder = + ENTITIES_VERSIONED_V2_REQUEST_BUILDERS + .batchGet() + .aspectsParam(aspectNames) + .entityTypeParam(entityName) + .ids( + batch.stream() + .map( + versionedUrn -> + com.linkedin.common.urn.VersionedUrn.of( + versionedUrn.getUrn().toString(), + versionedUrn.getVersionStamp())) + .collect(Collectors.toSet())); + + try { + responseMap.putAll( + sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) + .getEntity() + .getResults() + .entrySet() + .stream() + .collect( + Collectors.toMap( + entry -> UrnUtils.getUrn(entry.getKey().getUrn()), + entry -> entry.getValue().getEntity()))); + } catch (RemoteInvocationException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } /** diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java index 92c20c750c257..364ee9b0519d2 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java @@ -26,8 +26,9 @@ public SystemRestliEntityClient( @Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount, - EntityClientCacheConfig cacheConfig) { - super(restliClient, backoffPolicy, retryCount); + EntityClientCacheConfig cacheConfig, + int batchGetV2Size) { + super(restliClient, backoffPolicy, retryCount, batchGetV2Size); this.operationContextMap = CacheBuilder.newBuilder().maximumSize(500).build(); this.entityClientCache = buildEntityClientCache(SystemRestliEntityClient.class, cacheConfig); } diff --git a/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java b/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java index 1f8342170a2ff..474bb24f9e16b 100644 --- a/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java +++ b/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java @@ -37,7 +37,7 @@ public void testZeroRetry() throws RemoteInvocationException { when(mockRestliClient.sendRequest(any(ActionRequest.class))).thenReturn(mockFuture); RestliEntityClient testClient = - new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 0); + new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 0, 10); testClient.sendClientRequest(testRequestBuilder, AUTH); // Expected 1 actual try and 0 retries verify(mockRestliClient).sendRequest(any(ActionRequest.class)); @@ -56,7 +56,7 @@ public void testMultipleRetries() throws RemoteInvocationException { .thenReturn(mockFuture); RestliEntityClient testClient = - new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1); + new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10); testClient.sendClientRequest(testRequestBuilder, AUTH); // Expected 1 actual try and 1 retries verify(mockRestliClient, times(2)).sendRequest(any(ActionRequest.class)); @@ -73,7 +73,7 @@ public void testNonRetry() { .thenThrow(new RuntimeException(new RequiredFieldNotPresentException("value"))); RestliEntityClient testClient = - new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1); + new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10); assertThrows( RuntimeException.class, () -> testClient.sendClientRequest(testRequestBuilder, AUTH)); } diff --git a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java index e44acf06386c5..75614ca998f6a 100644 --- a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java +++ b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java @@ -45,7 +45,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { noCacheConfig.setEnabled(true); SystemRestliEntityClient noCacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1); com.linkedin.entity.EntityResponse responseStatusTrue = buildStatusResponse(true); com.linkedin.entity.EntityResponse responseStatusFalse = buildStatusResponse(false); @@ -83,7 +83,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60))); SystemRestliEntityClient cacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1); mockResponse(mockRestliClient, responseStatusTrue); assertEquals( @@ -117,7 +117,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio noCacheConfig.setEnabled(true); SystemRestliEntityClient noCacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1); com.linkedin.entity.EntityResponse responseStatusTrue = buildStatusResponse(true); com.linkedin.entity.EntityResponse responseStatusFalse = buildStatusResponse(false); @@ -155,7 +155,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60))); SystemRestliEntityClient cacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1); mockResponse(mockRestliClient, responseStatusTrue); assertEquals( diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java index 2c4ea4a634c76..3f9022b634c67 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java @@ -36,8 +36,7 @@ protected Map getTagsAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull GlobalTags defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -75,8 +74,7 @@ protected Map getEditableSchemaMetadataAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull EditableSchemaMetadata defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -114,8 +112,7 @@ protected Map getOwnershipAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull Ownership defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -153,8 +150,7 @@ protected Map getGlossaryTermsAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull GlossaryTerms defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -192,8 +188,7 @@ protected Map getDomainsAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull Domains defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } From 3dc5326e440b2479b06fed9362da0f4c7713b635 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Tue, 14 May 2024 08:57:53 +0900 Subject: [PATCH 15/15] docs: sort feature section alphabetically (#10400) --- docs-website/sidebars.js | 171 ++++++++++++------ docs/_feature-guide-template.md | 2 +- docs/act-on-metadata/impact-analysis.md | 2 +- docs/authentication/personal-access-tokens.md | 2 +- docs/authorization/access-policies-guide.md | 2 +- docs/authorization/roles.md | 2 +- docs/domains.md | 2 +- .../dataset-usage-and-query-history.md | 2 +- docs/glossary/business-glossary.md | 2 +- docs/how/search.md | 2 +- docs/incidents/incidents.md | 6 +- docs/managed-datahub/approval-workflows.md | 2 +- docs/posts.md | 2 +- docs/schema-history.md | 2 +- docs/sync-status.md | 2 +- docs/tags.md | 2 +- 16 files changed, 134 insertions(+), 71 deletions(-) diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 326bf804a3f92..64ded2f61c16f 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -43,75 +43,138 @@ module.exports = { description: "Learn about the features of DataHub.", }, items: [ - "docs/ui-ingestion", - "docs/how/search", - "docs/schema-history", // "docs/how/ui-tabs-guide", - "docs/domains", - "docs/dataproducts", - "docs/glossary/business-glossary", - "docs/tags", - "docs/ownership/ownership-types", - "docs/authorization/access-policies-guide", - "docs/features/dataset-usage-and-query-history", - "docs/posts", - "docs/sync-status", - "docs/incidents/incidents", - "docs/generated/lineage/lineage-feature-guide", - "docs/businessattributes", { + label: "Assertions", + type: "category", + link: { type: "doc", id: "docs/managed-datahub/observe/assertions" }, + items: [ + { + label: "Column Assertions", + type: "doc", + id: "docs/managed-datahub/observe/column-assertions", + className: "saasOnly", + }, + { + label: "Custom SQL Assertions", + type: "doc", + id: "docs/managed-datahub/observe/custom-sql-assertions", + className: "saasOnly", + }, + { + label: "Freshness Assertions", + type: "doc", + id: "docs/managed-datahub/observe/freshness-assertions", + className: "saasOnly", + }, + { + label: "Volume Assertions", + type: "doc", + id: "docs/managed-datahub/observe/volume-assertions", + className: "saasOnly", + }, + ], + }, + { + label: "Business Attributes", type: "doc", - id: "docs/tests/metadata-tests", - className: "saasOnly", + id: "docs/businessattributes", + }, + { + label: "Business Glossary", + type: "doc", + id: "docs/glossary/business-glossary", + }, + { + label: "Data Contract", + type: "doc", + id: "docs/managed-datahub/observe/data-contract", + }, + { + label: "Data Products", + type: "doc", + id: "docs/dataproducts", + }, + { + label: "Dataset Usage and Query History", + type: "doc", + id: "docs/features/dataset-usage-and-query-history", + }, + { + label: "Domains", + type: "doc", + id: "docs/domains", + }, + { + label: "Incidents", + type: "doc", + id: "docs/incidents/incidents", + }, + { + label: "Ingestion", + type: "doc", + id: "docs/ui-ingestion", }, - "docs/act-on-metadata/impact-analysis", { - label: "Observability", + label: "Lineage", type: "category", + link: { + type: "doc", + id: "docs/generated/lineage/lineage-feature-guide", + }, items: [ { - label: "Assertions", - type: "category", - link: { - type: "doc", - id: "docs/managed-datahub/observe/assertions", - }, - items: [ - { - type: "doc", - id: "docs/managed-datahub/observe/freshness-assertions", - className: "saasOnly", - }, - { - type: "doc", - id: "docs/managed-datahub/observe/volume-assertions", - className: "saasOnly", - }, - { - type: "doc", - id: "docs/managed-datahub/observe/custom-sql-assertions", - className: "saasOnly", - }, - { - type: "doc", - id: "docs/managed-datahub/observe/column-assertions", - className: "saasOnly", - }, - { - type: "doc", - id: "docs/managed-datahub/observe/schema-assertions", - className: "saasOnly", - }, - ], + label: "Lineage Impact analysis", + type: "doc", + id: "docs/act-on-metadata/impact-analysis", }, { + label: "Managing Lineage via UI", type: "doc", - id: "docs/managed-datahub/observe/data-contract", + id: "docs/features/feature-guides/ui-lineage", }, ], }, { - Guides: ["docs/features/feature-guides/ui-lineage"], + label: "Metadata Tests", + type: "doc", + id: "docs/tests/metadata-tests", + className: "saasOnly", + }, + { + label: "Ownership", + type: "doc", + id: "docs/ownership/ownership-types", + }, + { + label: "Policies", + type: "doc", + id: "docs/authorization/access-policies-guide", + }, + { + label: "Posts", + type: "doc", + id: "docs/posts", + }, + { + label: "Schema history", + type: "doc", + id: "docs/schema-history", + }, + { + label: "Search", + type: "doc", + id: "docs/how/search", + }, + { + label: "Sync Status", + type: "doc", + id: "docs/sync-status", + }, + { + label: "Tags", + type: "doc", + id: "docs/tags", }, ], }, diff --git a/docs/_feature-guide-template.md b/docs/_feature-guide-template.md index 63ba258d52d0b..9c1aead5e13ab 100644 --- a/docs/_feature-guide-template.md +++ b/docs/_feature-guide-template.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub [Feature Name] +# [Feature Name] diff --git a/docs/act-on-metadata/impact-analysis.md b/docs/act-on-metadata/impact-analysis.md index ae593d09c255f..3dbf532b2dd84 100644 --- a/docs/act-on-metadata/impact-analysis.md +++ b/docs/act-on-metadata/impact-analysis.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Lineage Impact Analysis +# Lineage Impact Analysis diff --git a/docs/authentication/personal-access-tokens.md b/docs/authentication/personal-access-tokens.md index ad81caef66f8f..8488163d85d15 100644 --- a/docs/authentication/personal-access-tokens.md +++ b/docs/authentication/personal-access-tokens.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Personal Access Tokens +# Personal Access Tokens diff --git a/docs/authorization/access-policies-guide.md b/docs/authorization/access-policies-guide.md index b8c23b0cd79b0..a9a54a762cd81 100644 --- a/docs/authorization/access-policies-guide.md +++ b/docs/authorization/access-policies-guide.md @@ -1,4 +1,4 @@ -# About DataHub Access Policies +# Access Policies diff --git a/docs/authorization/roles.md b/docs/authorization/roles.md index b25579072980d..7e2f1797309df 100644 --- a/docs/authorization/roles.md +++ b/docs/authorization/roles.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Roles +# Roles diff --git a/docs/domains.md b/docs/domains.md index afaec796d55df..98e2577387037 100644 --- a/docs/domains.md +++ b/docs/domains.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Domains +# Domains diff --git a/docs/features/dataset-usage-and-query-history.md b/docs/features/dataset-usage-and-query-history.md index 2d06b932572b7..37cbc16cfe74f 100644 --- a/docs/features/dataset-usage-and-query-history.md +++ b/docs/features/dataset-usage-and-query-history.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Dataset Usage & Query History +# Dataset Usage & Query History diff --git a/docs/glossary/business-glossary.md b/docs/glossary/business-glossary.md index e10cbed30b913..9c9daabcb94c7 100644 --- a/docs/glossary/business-glossary.md +++ b/docs/glossary/business-glossary.md @@ -4,7 +4,7 @@ title: Business Glossary import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Business Glossary +# Business Glossary diff --git a/docs/how/search.md b/docs/how/search.md index 3b50a0da7fec4..7012f5321f2ff 100644 --- a/docs/how/search.md +++ b/docs/how/search.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Search +# Search diff --git a/docs/incidents/incidents.md b/docs/incidents/incidents.md index 5f51e421aad3b..578571289cd2e 100644 --- a/docs/incidents/incidents.md +++ b/docs/incidents/incidents.md @@ -4,7 +4,7 @@ description: This page provides an overview of working with the DataHub Incident import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About Incidents +# Incidents @@ -14,8 +14,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; A couple scenarios in which incidents can be useful are -1**Communicating Assets with Ongoing Issues**: You can mark a known-bad data asset as under an ongoing incident so consumers and stakeholders can be informed about the health status of a data asset via the DataHub UI. Moreover, they can follow the incident as it progresses toward resolution. -2**Pipeline Circuit Breaking (advanced):** You can use Incidents as a basis for orchestrating and blocking data pipelines that have inputs with active issues to avoid propagating bad data downstream. +1. **Communicating Assets with Ongoing Issues**: You can mark a known-bad data asset as under an ongoing incident so consumers and stakeholders can be informed about the health status of a data asset via the DataHub UI. Moreover, they can follow the incident as it progresses toward resolution. +2. **Pipeline Circuit Breaking (advanced):** You can use Incidents as a basis for orchestrating and blocking data pipelines that have inputs with active issues to avoid propagating bad data downstream. In the next section, we'll walk through how to diff --git a/docs/managed-datahub/approval-workflows.md b/docs/managed-datahub/approval-workflows.md index 3853a7c37817f..75cab458d285d 100644 --- a/docs/managed-datahub/approval-workflows.md +++ b/docs/managed-datahub/approval-workflows.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Approval Workflows +# Approval Workflows diff --git a/docs/posts.md b/docs/posts.md index cdaf9d4325d0f..c44125bbd0017 100644 --- a/docs/posts.md +++ b/docs/posts.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Posts +# Posts DataHub allows users to make Posts that can be displayed on the app. Currently, Posts are only supported on the Home Page, but may be extended to other surfaces of the app in the future. Posts can be used to accomplish the following: diff --git a/docs/schema-history.md b/docs/schema-history.md index 120d041960186..e57b550dd98a7 100644 --- a/docs/schema-history.md +++ b/docs/schema-history.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Schema History +# Schema History diff --git a/docs/sync-status.md b/docs/sync-status.md index a249a324e561c..2e9fbcdb5b7bd 100644 --- a/docs/sync-status.md +++ b/docs/sync-status.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Sync Status +# Sync Status diff --git a/docs/tags.md b/docs/tags.md index f626ae79b1a0f..880e57f8d0a4f 100644 --- a/docs/tags.md +++ b/docs/tags.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Tags +# Tags