diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss
index e256c752b4a0b..96ca07d45d0c2 100644
--- a/docs-website/src/styles/global.scss
+++ b/docs-website/src/styles/global.scss
@@ -31,7 +31,7 @@
--ifm-navbar-item-padding-horizontal: 1rem;
/* Announcement Bar */
- --docusaurus-announcement-bar-height: 60px !important;
+ --docusaurus-announcement-bar-height: 48px !important;
/* Rule */
--ifm-hr-border-width: 1px 0 0 0;
@@ -141,8 +141,9 @@ div[class^="announcementBar"] {
}
a {
- color: var(--ifm-button-color);
+ color: #EFB300;
text-decoration: none;
+ font-size: 1rem
}
}
}
diff --git a/docs/advanced/mcp-mcl.md b/docs/advanced/mcp-mcl.md
index 9efb9b794954d..333891ba1a95d 100644
--- a/docs/advanced/mcp-mcl.md
+++ b/docs/advanced/mcp-mcl.md
@@ -14,6 +14,18 @@ To mitigate these downsides, we are committed to providing cross-language client
Ultimately, we intend to realize a state in which the Entities and Aspect schemas can be altered without requiring generated code and without maintaining a single mega-model schema (looking at you, Snapshot.pdl). The intention is that changes to the metadata model become even easier than they are today.
+### Synchronous Ingestion Architecture
+
+
+
+
+
+### Asynchronous Ingestion Architecture
+
+
+
+
+
## Modeling
A Metadata Change Proposal is defined (in PDL) as follows
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 00e020bd2a387..dbcc7da846703 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -24,6 +24,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
- #11484 - Metadata service authentication enabled by default
- #11484 - Rest API authorization enabled by default
- #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases.
+- #11619 - schema field/column paths can no longer be empty strings
+- #11619 - schema field/column paths can no longer be duplicated within the schema
### Potential Downtime
diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java
new file mode 100644
index 0000000000000..7c279254e1bc3
--- /dev/null
+++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java
@@ -0,0 +1,116 @@
+package com.linkedin.metadata.aspect.validation;
+
+import static com.linkedin.metadata.Constants.*;
+
+import com.linkedin.metadata.aspect.RetrieverContext;
+import com.linkedin.metadata.aspect.batch.BatchItem;
+import com.linkedin.metadata.aspect.batch.ChangeMCP;
+import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
+import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator;
+import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException;
+import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection;
+import com.linkedin.schema.EditableSchemaFieldInfo;
+import com.linkedin.schema.EditableSchemaMetadata;
+import com.linkedin.schema.SchemaField;
+import com.linkedin.schema.SchemaMetadata;
+import java.util.Collection;
+import java.util.Optional;
+import java.util.stream.Stream;
+import javax.annotation.Nonnull;
+import lombok.Getter;
+import lombok.Setter;
+import lombok.experimental.Accessors;
+
+/**
+ * 1. Validates the Schema Field Path specification, specifically that all field IDs must be unique
+ * across all fields within a schema. 2. Validates that the field path id is not empty.
+ *
+ * @see
Field
+ * Path V2 docs
+ */
+@Setter
+@Getter
+@Accessors(chain = true)
+public class FieldPathValidator extends AspectPayloadValidator {
+ @Nonnull private AspectPluginConfig config;
+
+ /** Prevent any MCP for SchemaMetadata where field ids are duplicated. */
+ @Override
+ protected Stream
validateProposedAspects(
+ @Nonnull Collection extends BatchItem> mcpItems,
+ @Nonnull RetrieverContext retrieverContext) {
+
+ ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection();
+
+ mcpItems.forEach(
+ i -> {
+ if (i.getAspectName().equals(SCHEMA_METADATA_ASPECT_NAME)) {
+ processSchemaMetadataAspect(i, exceptions);
+ } else {
+ processEditableSchemaMetadataAspect(i, exceptions);
+ }
+ });
+
+ return exceptions.streamAllExceptions();
+ }
+
+ @Override
+ protected Stream validatePreCommitAspects(
+ @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) {
+ return Stream.of();
+ }
+
+ private static void processEditableSchemaMetadataAspect(
+ BatchItem i, ValidationExceptionCollection exceptions) {
+ final EditableSchemaMetadata schemaMetadata = i.getAspect(EditableSchemaMetadata.class);
+ final long uniquePaths =
+ validateAndCount(
+ i,
+ schemaMetadata.getEditableSchemaFieldInfo().stream()
+ .map(EditableSchemaFieldInfo::getFieldPath),
+ exceptions);
+
+ if (uniquePaths != schemaMetadata.getEditableSchemaFieldInfo().size()) {
+ exceptions.addException(
+ i,
+ String.format(
+ "Cannot perform %s action on proposal. EditableSchemaMetadata aspect has duplicated field paths",
+ i.getChangeType()));
+ }
+ }
+
+ private static void processSchemaMetadataAspect(
+ BatchItem i, ValidationExceptionCollection exceptions) {
+ final SchemaMetadata schemaMetadata = i.getAspect(SchemaMetadata.class);
+ final long uniquePaths =
+ validateAndCount(
+ i, schemaMetadata.getFields().stream().map(SchemaField::getFieldPath), exceptions);
+
+ if (uniquePaths != schemaMetadata.getFields().size()) {
+ exceptions.addException(
+ i,
+ String.format(
+ "Cannot perform %s action on proposal. SchemaMetadata aspect has duplicated field paths",
+ i.getChangeType()));
+ }
+ }
+
+ private static long validateAndCount(
+ BatchItem i, Stream fieldPaths, ValidationExceptionCollection exceptions) {
+ return fieldPaths
+ .distinct()
+ // inspect the stream of fieldPath validation errors since we're already iterating
+ .peek(
+ fieldPath ->
+ validateFieldPath(fieldPath)
+ .ifPresent(message -> exceptions.addException(i, message)))
+ .count();
+ }
+
+ private static Optional validateFieldPath(String fieldPath) {
+ if (fieldPath == null || fieldPath.isEmpty()) {
+ return Optional.of("SchemaMetadata aspect has empty field path.");
+ }
+ return Optional.empty();
+ }
+}
diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java
new file mode 100644
index 0000000000000..bd5912764edce
--- /dev/null
+++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java
@@ -0,0 +1,233 @@
+package com.linkedin.metadata.aspect.validators;
+
+import static com.linkedin.metadata.Constants.*;
+import static org.mockito.Mockito.*;
+import static org.testng.Assert.*;
+
+import com.linkedin.common.urn.DatasetUrn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.metadata.aspect.AspectRetriever;
+import com.linkedin.metadata.aspect.GraphRetriever;
+import com.linkedin.metadata.aspect.RetrieverContext;
+import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
+import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException;
+import com.linkedin.metadata.aspect.validation.CreateIfNotExistsValidator;
+import com.linkedin.metadata.aspect.validation.FieldPathValidator;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.schema.EditableSchemaFieldInfo;
+import com.linkedin.schema.EditableSchemaFieldInfoArray;
+import com.linkedin.schema.EditableSchemaMetadata;
+import com.linkedin.schema.SchemaField;
+import com.linkedin.schema.SchemaFieldArray;
+import com.linkedin.schema.SchemaFieldDataType;
+import com.linkedin.schema.SchemaMetadata;
+import com.linkedin.schema.StringType;
+import com.linkedin.test.metadata.aspect.TestEntityRegistry;
+import com.linkedin.test.metadata.aspect.batch.TestMCP;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+
+public class FieldPathValidatorTest {
+
+ private static final AspectPluginConfig validatorConfig =
+ AspectPluginConfig.builder()
+ .supportedOperations(
+ Arrays.stream(ChangeType.values())
+ .map(Objects::toString)
+ .collect(Collectors.toList()))
+ .className(CreateIfNotExistsValidator.class.getName())
+ .supportedEntityAspectNames(List.of(AspectPluginConfig.EntityAspectName.ALL))
+ .enabled(true)
+ .build();
+ private EntityRegistry entityRegistry;
+ private RetrieverContext mockRetrieverContext;
+ private static final DatasetUrn TEST_DATASET_URN;
+ private final FieldPathValidator test = new FieldPathValidator().setConfig(validatorConfig);
+
+ static {
+ try {
+ TEST_DATASET_URN =
+ DatasetUrn.createFromUrn(
+ UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,test,PROD)"));
+ } catch (URISyntaxException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @BeforeTest
+ public void init() {
+ entityRegistry = new TestEntityRegistry();
+ AspectRetriever mockAspectRetriever = mock(AspectRetriever.class);
+ when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry);
+ GraphRetriever mockGraphRetriever = mock(GraphRetriever.class);
+ mockRetrieverContext = mock(RetrieverContext.class);
+ when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever);
+ when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever);
+ }
+
+ @Test
+ public void testValidateNonDuplicatedSchemaFieldPath() {
+ final SchemaMetadata schema = getMockSchemaMetadataAspect(false);
+ assertEquals(
+ test.validateProposed(
+ Set.of(
+ TestMCP.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_DATASET_URN)
+ .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType()))
+ .aspectSpec(
+ entityRegistry
+ .getEntitySpec(TEST_DATASET_URN.getEntityType())
+ .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME))
+ .recordTemplate(schema)
+ .build()),
+ mockRetrieverContext)
+ .count(),
+ 0);
+ }
+
+ @Test
+ public void testValidateDuplicatedSchemaFieldPath() {
+ final SchemaMetadata schema = getMockSchemaMetadataAspect(true);
+
+ assertEquals(
+ test.validateProposed(
+ Set.of(
+ TestMCP.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_DATASET_URN)
+ .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType()))
+ .aspectSpec(
+ entityRegistry
+ .getEntitySpec(TEST_DATASET_URN.getEntityType())
+ .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME))
+ .recordTemplate(schema)
+ .build()),
+ mockRetrieverContext)
+ .count(),
+ 1);
+ }
+
+ @Test
+ public void testValidateNonDuplicatedEditableSchemaFieldPath() {
+ final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(false);
+ assertEquals(
+ test.validateProposed(
+ Set.of(
+ TestMCP.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_DATASET_URN)
+ .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType()))
+ .aspectSpec(
+ entityRegistry
+ .getEntitySpec(TEST_DATASET_URN.getEntityType())
+ .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME))
+ .recordTemplate(schema)
+ .build()),
+ mockRetrieverContext)
+ .count(),
+ 0);
+ }
+
+ @Test
+ public void testValidateDuplicatedEditableSchemaFieldPath() {
+ final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(true);
+
+ assertEquals(
+ test.validateProposed(
+ Set.of(
+ TestMCP.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_DATASET_URN)
+ .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType()))
+ .aspectSpec(
+ entityRegistry
+ .getEntitySpec(TEST_DATASET_URN.getEntityType())
+ .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME))
+ .recordTemplate(schema)
+ .build()),
+ mockRetrieverContext)
+ .count(),
+ 1);
+ }
+
+ @Test
+ public void testEmptySchemaFieldPath() {
+ final SchemaMetadata schema = getMockSchemaMetadataAspect(false, "");
+ TestMCP testItem =
+ TestMCP.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_DATASET_URN)
+ .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType()))
+ .aspectSpec(
+ entityRegistry
+ .getEntitySpec(TEST_DATASET_URN.getEntityType())
+ .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME))
+ .recordTemplate(schema)
+ .build();
+
+ Set exceptions =
+ test.validateProposed(Set.of(testItem), mockRetrieverContext).collect(Collectors.toSet());
+
+ assertEquals(
+ exceptions,
+ Set.of(
+ AspectValidationException.forItem(
+ testItem, "SchemaMetadata aspect has empty field path.")));
+ }
+
+ private static SchemaMetadata getMockSchemaMetadataAspect(boolean duplicateFields) {
+ return getMockSchemaMetadataAspect(duplicateFields, null);
+ }
+
+ private static SchemaMetadata getMockSchemaMetadataAspect(
+ boolean duplicateFields, @Nullable String fieldPath) {
+ List fields = new ArrayList<>();
+ fields.add(
+ new SchemaField()
+ .setType(
+ new SchemaFieldDataType()
+ .setType(SchemaFieldDataType.Type.create(new StringType())))
+ .setNullable(false)
+ .setNativeDataType("string")
+ .setFieldPath(fieldPath == null ? "test" : fieldPath));
+
+ if (duplicateFields) {
+ fields.add(
+ new SchemaField()
+ .setType(
+ new SchemaFieldDataType()
+ .setType(SchemaFieldDataType.Type.create(new StringType())))
+ .setNullable(false)
+ .setNativeDataType("string")
+ .setFieldPath(fieldPath == null ? "test" : fieldPath));
+ }
+
+ return new SchemaMetadata()
+ .setPlatform(TEST_DATASET_URN.getPlatformEntity())
+ .setFields(new SchemaFieldArray(fields));
+ }
+
+ private static EditableSchemaMetadata getMockEditableSchemaMetadataAspect(
+ boolean duplicateFields) {
+
+ List fields = new ArrayList<>();
+ fields.add(new EditableSchemaFieldInfo().setFieldPath("test"));
+
+ if (duplicateFields) {
+ fields.add(new EditableSchemaFieldInfo().setFieldPath("test"));
+ }
+
+ return new EditableSchemaMetadata()
+ .setEditableSchemaFieldInfo(new EditableSchemaFieldInfoArray(fields));
+ }
+}
diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
index 6988dd4b3db3e..9cc9e7eac58ac 100644
--- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java
+++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java
@@ -319,6 +319,13 @@ public class Constants {
public static final String EXECUTION_REQUEST_INPUT_ASPECT_NAME = "dataHubExecutionRequestInput";
public static final String EXECUTION_REQUEST_SIGNAL_ASPECT_NAME = "dataHubExecutionRequestSignal";
public static final String EXECUTION_REQUEST_RESULT_ASPECT_NAME = "dataHubExecutionRequestResult";
+ public static final String EXECUTION_REQUEST_STATUS_RUNNING = "RUNNING";
+ public static final String EXECUTION_REQUEST_STATUS_FAILURE = "FAILURE";
+ public static final String EXECUTION_REQUEST_STATUS_SUCCESS = "SUCCESS";
+ public static final String EXECUTION_REQUEST_STATUS_TIMEOUT = "TIMEOUT";
+ public static final String EXECUTION_REQUEST_STATUS_CANCELLED = "CANCELLED";
+ public static final String EXECUTION_REQUEST_STATUS_ABORTED = "ABORTED";
+ public static final String EXECUTION_REQUEST_STATUS_DUPLICATE = "DUPLICATE";
// DataHub Access Token
public static final String ACCESS_TOKEN_KEY_ASPECT_NAME = "dataHubAccessTokenKey";
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index 98629ba030695..2b2f992249f1e 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -1,6 +1,7 @@
import datetime
import logging
-import uuid
+import random
+import string
from typing import Any, Dict, List, Optional
from pydantic import Field, validator
@@ -71,6 +72,15 @@ class FlagsConfig(ConfigModel):
)
+def _generate_run_id(source_type: Optional[str] = None) -> str:
+ current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
+ random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
+
+ if source_type is None:
+ source_type = "ingestion"
+ return f"{source_type}-{current_time}-{random_suffix}"
+
+
class PipelineConfig(ConfigModel):
source: SourceConfig
sink: Optional[DynamicTypedConfig] = None
@@ -91,12 +101,11 @@ def run_id_should_be_semantic(
cls, v: Optional[str], values: Dict[str, Any], **kwargs: Any
) -> str:
if v == DEFAULT_RUN_ID:
+ source_type = None
if "source" in values and hasattr(values["source"], "type"):
source_type = values["source"].type
- current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
- return f"{source_type}-{current_time}"
- return str(uuid.uuid1()) # default run_id if we cannot infer a source type
+ return _generate_run_id(source_type)
else:
assert v is not None
return v
diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
index 61b222f8d2dd5..155773f9898b4 100644
--- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
+++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
@@ -27,6 +27,8 @@
r"root\[\d+\]\['aspect'\]\['json'\]\['lastUpdatedTimestamp'\]",
r"root\[\d+\]\['aspect'\]\['json'\]\['created'\]",
r"root\[\d+\]\['aspect'\]\['json'\]\['lastModified'\]",
+ r"root\[\d+\].*?\['systemMetadata'\]\['runId'\]",
+ r"root\[\d+\].*?\['systemMetadata'\]\['lastRunId'\]",
]
@@ -82,6 +84,8 @@ def assert_metadata_files_equal(
json_path = f"root[{i}]['aspect']['json'][{j}]['value']"
ignore_paths = (*ignore_paths, re.escape(json_path))
+ ignore_paths = (*ignore_paths, *default_exclude_paths)
+
diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
if diff and update_golden:
if isinstance(diff, MCPDiff) and diff.is_delta_valid:
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java
new file mode 100644
index 0000000000000..b77d3b48d5bd5
--- /dev/null
+++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java
@@ -0,0 +1,70 @@
+package com.linkedin.metadata.aspect.validation;
+
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_ABORTED;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_CANCELLED;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_DUPLICATE;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_SUCCESS;
+
+import com.linkedin.execution.ExecutionRequestResult;
+import com.linkedin.metadata.aspect.RetrieverContext;
+import com.linkedin.metadata.aspect.batch.BatchItem;
+import com.linkedin.metadata.aspect.batch.ChangeMCP;
+import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
+import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator;
+import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException;
+import java.util.Collection;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Stream;
+import javax.annotation.Nonnull;
+import lombok.Getter;
+import lombok.Setter;
+import lombok.experimental.Accessors;
+import lombok.extern.slf4j.Slf4j;
+
+/** A Validator for StructuredProperties Aspect that is attached to entities like Datasets, etc. */
+@Setter
+@Getter
+@Slf4j
+@Accessors(chain = true)
+public class ExecutionRequestResultValidator extends AspectPayloadValidator {
+ private static final Set IMMUTABLE_STATUS =
+ Set.of(
+ EXECUTION_REQUEST_STATUS_ABORTED,
+ EXECUTION_REQUEST_STATUS_CANCELLED,
+ EXECUTION_REQUEST_STATUS_SUCCESS,
+ EXECUTION_REQUEST_STATUS_DUPLICATE);
+
+ @Nonnull private AspectPluginConfig config;
+
+ @Override
+ protected Stream validateProposedAspects(
+ @Nonnull Collection extends BatchItem> mcpItems,
+ @Nonnull RetrieverContext retrieverContext) {
+ return Stream.of();
+ }
+
+ @Override
+ protected Stream validatePreCommitAspects(
+ @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) {
+ return changeMCPs.stream()
+ .filter(item -> item.getPreviousRecordTemplate() != null)
+ .map(
+ item -> {
+ ExecutionRequestResult existingResult =
+ item.getPreviousAspect(ExecutionRequestResult.class);
+
+ if (IMMUTABLE_STATUS.contains(existingResult.getStatus())) {
+ ExecutionRequestResult currentResult = item.getAspect(ExecutionRequestResult.class);
+ return AspectValidationException.forItem(
+ item,
+ String.format(
+ "Invalid update to immutable state for aspect dataHubExecutionRequestResult. Execution urn: %s previous status: %s. Denied status update: %s",
+ item.getUrn(), existingResult.getStatus(), currentResult.getStatus()));
+ }
+
+ return null;
+ })
+ .filter(Objects::nonNull);
+ }
+}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
index f09a81c0c8b89..2d7db075e676f 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
@@ -370,7 +370,7 @@ public AutoCompleteResult autoComplete(
IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention();
AutocompleteRequestHandler builder =
AutocompleteRequestHandler.getBuilder(
- entitySpec, customSearchConfiguration, queryFilterRewriteChain);
+ entitySpec, customSearchConfiguration, queryFilterRewriteChain, searchConfiguration);
SearchRequest req =
builder.getSearchRequest(
opContext,
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java
index 294efb069a904..45359285b4a04 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java
@@ -1,6 +1,5 @@
package com.linkedin.metadata.search.elasticsearch.query.request;
-import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES;
import static com.linkedin.metadata.search.utils.ESAccessControlUtil.restrictUrn;
import static com.linkedin.metadata.search.utils.ESUtils.applyDefaultSearchFilters;
@@ -8,6 +7,7 @@
import com.google.common.collect.ImmutableList;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.template.StringArray;
+import com.linkedin.metadata.config.search.SearchConfiguration;
import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration;
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
import com.linkedin.metadata.config.search.custom.QueryConfiguration;
@@ -35,6 +35,7 @@
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.action.search.SearchRequest;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.index.query.*;
@@ -46,7 +47,7 @@
@Slf4j
public class AutocompleteRequestHandler {
- private final List _defaultAutocompleteFields;
+ private final List _defaultAutocompleteFields;
private final Map> searchableFieldTypes;
private static final Map
@@ -56,11 +57,13 @@ public class AutocompleteRequestHandler {
private final EntitySpec entitySpec;
private final QueryFilterRewriteChain queryFilterRewriteChain;
+ private final SearchConfiguration searchConfiguration;
public AutocompleteRequestHandler(
@Nonnull EntitySpec entitySpec,
@Nullable CustomSearchConfiguration customSearchConfiguration,
- @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) {
+ @Nonnull QueryFilterRewriteChain queryFilterRewriteChain,
+ @Nonnull SearchConfiguration searchConfiguration) {
this.entitySpec = entitySpec;
List fieldSpecs = entitySpec.getSearchableFieldSpecs();
this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build();
@@ -69,8 +72,12 @@ public AutocompleteRequestHandler(
fieldSpecs.stream()
.map(SearchableFieldSpec::getSearchableAnnotation)
.filter(SearchableAnnotation::isEnableAutocomplete)
- .map(SearchableAnnotation::getFieldName),
- Stream.of("urn"))
+ .map(
+ searchableAnnotation ->
+ Pair.of(
+ searchableAnnotation.getFieldName(),
+ Double.toString(searchableAnnotation.getBoostScore()))),
+ Stream.of(Pair.of("urn", "1.0")))
.collect(Collectors.toList());
searchableFieldTypes =
fieldSpecs.stream()
@@ -87,17 +94,22 @@ public AutocompleteRequestHandler(
return set1;
}));
this.queryFilterRewriteChain = queryFilterRewriteChain;
+ this.searchConfiguration = searchConfiguration;
}
public static AutocompleteRequestHandler getBuilder(
@Nonnull EntitySpec entitySpec,
@Nullable CustomSearchConfiguration customSearchConfiguration,
- @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) {
+ @Nonnull QueryFilterRewriteChain queryFilterRewriteChain,
+ @Nonnull SearchConfiguration searchConfiguration) {
return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent(
entitySpec,
k ->
new AutocompleteRequestHandler(
- entitySpec, customSearchConfiguration, queryFilterRewriteChain));
+ entitySpec,
+ customSearchConfiguration,
+ queryFilterRewriteChain,
+ searchConfiguration));
}
public SearchRequest getSearchRequest(
@@ -169,7 +181,7 @@ private BoolQueryBuilder getQuery(
public BoolQueryBuilder getQuery(
@Nonnull ObjectMapper objectMapper,
@Nullable AutocompleteConfiguration customAutocompleteConfig,
- List autocompleteFields,
+ List autocompleteFields,
@Nonnull String query) {
BoolQueryBuilder finalQuery =
@@ -189,7 +201,7 @@ public BoolQueryBuilder getQuery(
private Optional getAutocompleteQuery(
@Nullable AutocompleteConfiguration customConfig,
- List autocompleteFields,
+ List autocompleteFields,
@Nonnull String query) {
Optional result = Optional.empty();
@@ -200,33 +212,39 @@ private Optional getAutocompleteQuery(
return result;
}
- private static BoolQueryBuilder defaultQuery(
- List autocompleteFields, @Nonnull String query) {
+ private BoolQueryBuilder defaultQuery(List autocompleteFields, @Nonnull String query) {
BoolQueryBuilder finalQuery = QueryBuilders.boolQuery().minimumShouldMatch(1);
// Search for exact matches with higher boost and ngram matches
- MultiMatchQueryBuilder autocompleteQueryBuilder =
+ MultiMatchQueryBuilder multiMatchQueryBuilder =
QueryBuilders.multiMatchQuery(query).type(MultiMatchQueryBuilder.Type.BOOL_PREFIX);
- final float urnBoost =
- Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore"));
autocompleteFields.forEach(
- fieldName -> {
- if ("urn".equals(fieldName)) {
- autocompleteQueryBuilder.field(fieldName + ".ngram", urnBoost);
- autocompleteQueryBuilder.field(fieldName + ".ngram._2gram", urnBoost);
- autocompleteQueryBuilder.field(fieldName + ".ngram._3gram", urnBoost);
- autocompleteQueryBuilder.field(fieldName + ".ngram._4gram", urnBoost);
- } else {
- autocompleteQueryBuilder.field(fieldName + ".ngram");
- autocompleteQueryBuilder.field(fieldName + ".ngram._2gram");
- autocompleteQueryBuilder.field(fieldName + ".ngram._3gram");
- autocompleteQueryBuilder.field(fieldName + ".ngram._4gram");
+ pair -> {
+ final String fieldName = (String) pair.getLeft();
+ final float boostScore = Float.parseFloat((String) pair.getRight());
+ multiMatchQueryBuilder.field(fieldName + ".ngram");
+ multiMatchQueryBuilder.field(fieldName + ".ngram._2gram");
+ multiMatchQueryBuilder.field(fieldName + ".ngram._3gram");
+ multiMatchQueryBuilder.field(fieldName + ".ngram._4gram");
+ multiMatchQueryBuilder.field(fieldName + ".delimited");
+ if (!fieldName.equalsIgnoreCase("urn")) {
+ multiMatchQueryBuilder.field(fieldName + ".ngram", boostScore);
+ multiMatchQueryBuilder.field(
+ fieldName + ".ngram._2gram",
+ boostScore * (searchConfiguration.getWordGram().getTwoGramFactor()));
+ multiMatchQueryBuilder.field(
+ fieldName + ".ngram._3gram",
+ boostScore * (searchConfiguration.getWordGram().getThreeGramFactor()));
+ multiMatchQueryBuilder.field(
+ fieldName + ".ngram._4gram",
+ boostScore * (searchConfiguration.getWordGram().getFourGramFactor()));
+ finalQuery.should(
+ QueryBuilders.matchQuery(fieldName + ".keyword", query).boost(boostScore));
}
- autocompleteQueryBuilder.field(fieldName + ".delimited");
finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(fieldName + ".delimited", query));
});
- finalQuery.should(autocompleteQueryBuilder);
+ finalQuery.should(multiMatchQueryBuilder);
return finalQuery;
}
@@ -241,12 +259,17 @@ private HighlightBuilder getHighlights(@Nullable String field) {
// Check for each field name and any subfields
getAutocompleteFields(field)
.forEach(
- fieldName ->
- highlightBuilder
- .field(fieldName)
- .field(fieldName + ".*")
- .field(fieldName + ".ngram")
- .field(fieldName + ".delimited"));
+ pair -> {
+ final String fieldName = (String) pair.getLeft();
+ highlightBuilder
+ .field(fieldName)
+ .field(fieldName + ".*")
+ .field(fieldName + ".ngram")
+ .field(fieldName + ".delimited");
+ if (!fieldName.equalsIgnoreCase("urn")) {
+ highlightBuilder.field(fieldName + ".keyword");
+ }
+ });
// set field match req false for ngram
highlightBuilder.fields().stream()
@@ -256,9 +279,9 @@ private HighlightBuilder getHighlights(@Nullable String field) {
return highlightBuilder;
}
- private List getAutocompleteFields(@Nullable String field) {
- if (field != null && !field.isEmpty()) {
- return ImmutableList.of(field);
+ private List getAutocompleteFields(@Nullable String field) {
+ if (field != null && !field.isEmpty() && !field.equalsIgnoreCase("urn")) {
+ return ImmutableList.of(Pair.of(field, "10.0"));
}
return _defaultAutocompleteFields;
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java
new file mode 100644
index 0000000000000..f46772ca7b350
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java
@@ -0,0 +1,166 @@
+package com.linkedin.metadata.aspect.validation;
+
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_ENTITY_NAME;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_RESULT_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_ABORTED;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_CANCELLED;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_DUPLICATE;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_FAILURE;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_RUNNING;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_SUCCESS;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_TIMEOUT;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.events.metadata.ChangeType;
+import com.linkedin.execution.ExecutionRequestResult;
+import com.linkedin.metadata.aspect.RetrieverContext;
+import com.linkedin.metadata.aspect.SystemAspect;
+import com.linkedin.metadata.aspect.batch.ChangeMCP;
+import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
+import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException;
+import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl;
+import com.linkedin.metadata.utils.AuditStampUtils;
+import io.datahubproject.metadata.context.OperationContext;
+import io.datahubproject.test.metadata.context.TestOperationContexts;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.testng.annotations.Test;
+
+public class ExecutionRequestResultValidatorTest {
+ private static final OperationContext TEST_CONTEXT =
+ TestOperationContexts.systemContextNoSearchAuthorization();
+ private static final AspectPluginConfig TEST_PLUGIN_CONFIG =
+ AspectPluginConfig.builder()
+ .className(ExecutionRequestResultValidator.class.getName())
+ .enabled(true)
+ .supportedOperations(List.of("UPSERT"))
+ .supportedEntityAspectNames(
+ List.of(
+ AspectPluginConfig.EntityAspectName.builder()
+ .entityName(EXECUTION_REQUEST_ENTITY_NAME)
+ .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME)
+ .build()))
+ .build();
+ private static final Urn TEST_URN = UrnUtils.getUrn("urn:li:dataHubExecutionRequest:xyz");
+
+ @Test
+ public void testAllowed() {
+ ExecutionRequestResultValidator test = new ExecutionRequestResultValidator();
+ test.setConfig(TEST_PLUGIN_CONFIG);
+
+ Set allowedUpdateStates =
+ Set.of(
+ EXECUTION_REQUEST_STATUS_RUNNING,
+ EXECUTION_REQUEST_STATUS_FAILURE,
+ EXECUTION_REQUEST_STATUS_TIMEOUT);
+ Set destinationStates = new HashSet<>(allowedUpdateStates);
+ destinationStates.addAll(
+ Set.of(
+ EXECUTION_REQUEST_STATUS_ABORTED,
+ EXECUTION_REQUEST_STATUS_CANCELLED,
+ EXECUTION_REQUEST_STATUS_SUCCESS,
+ EXECUTION_REQUEST_STATUS_DUPLICATE));
+
+ List testItems =
+ new ArrayList<>(
+ // Tests with previous state
+ allowedUpdateStates.stream()
+ .flatMap(
+ prevState ->
+ destinationStates.stream()
+ .map(
+ destState -> {
+ SystemAspect prevData = mock(SystemAspect.class);
+ when(prevData.getRecordTemplate())
+ .thenReturn(
+ new ExecutionRequestResult().setStatus(prevState));
+ return ChangeItemImpl.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_URN)
+ .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME)
+ .recordTemplate(
+ new ExecutionRequestResult().setStatus(destState))
+ .previousSystemAspect(prevData)
+ .auditStamp(AuditStampUtils.createDefaultAuditStamp())
+ .build(TEST_CONTEXT.getAspectRetriever());
+ }))
+ .toList());
+ // Tests with no previous
+ testItems.addAll(
+ destinationStates.stream()
+ .map(
+ destState ->
+ ChangeItemImpl.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_URN)
+ .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME)
+ .recordTemplate(new ExecutionRequestResult().setStatus(destState))
+ .auditStamp(AuditStampUtils.createDefaultAuditStamp())
+ .build(TEST_CONTEXT.getAspectRetriever()))
+ .toList());
+
+ List result =
+ test.validatePreCommitAspects(testItems, mock(RetrieverContext.class)).toList();
+
+ assertTrue(result.isEmpty(), "Did not expect any validation errors.");
+ }
+
+ @Test
+ public void testDenied() {
+ ExecutionRequestResultValidator test = new ExecutionRequestResultValidator();
+ test.setConfig(TEST_PLUGIN_CONFIG);
+
+ Set deniedUpdateStates =
+ Set.of(
+ EXECUTION_REQUEST_STATUS_ABORTED,
+ EXECUTION_REQUEST_STATUS_CANCELLED,
+ EXECUTION_REQUEST_STATUS_SUCCESS,
+ EXECUTION_REQUEST_STATUS_DUPLICATE);
+ Set destinationStates = new HashSet<>(deniedUpdateStates);
+ destinationStates.addAll(
+ Set.of(
+ EXECUTION_REQUEST_STATUS_RUNNING,
+ EXECUTION_REQUEST_STATUS_FAILURE,
+ EXECUTION_REQUEST_STATUS_TIMEOUT));
+
+ List testItems =
+ new ArrayList<>(
+ // Tests with previous state
+ deniedUpdateStates.stream()
+ .flatMap(
+ prevState ->
+ destinationStates.stream()
+ .map(
+ destState -> {
+ SystemAspect prevData = mock(SystemAspect.class);
+ when(prevData.getRecordTemplate())
+ .thenReturn(
+ new ExecutionRequestResult().setStatus(prevState));
+ return ChangeItemImpl.builder()
+ .changeType(ChangeType.UPSERT)
+ .urn(TEST_URN)
+ .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME)
+ .recordTemplate(
+ new ExecutionRequestResult().setStatus(destState))
+ .previousSystemAspect(prevData)
+ .auditStamp(AuditStampUtils.createDefaultAuditStamp())
+ .build(TEST_CONTEXT.getAspectRetriever());
+ }))
+ .toList());
+
+ List result =
+ test.validatePreCommitAspects(testItems, mock(RetrieverContext.class)).toList();
+
+ assertEquals(
+ result.size(),
+ deniedUpdateStates.size() * destinationStates.size(),
+ "Expected ALL items to be denied.");
+ }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
index bc3c892e07b1b..504eb5f5fc13d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
@@ -283,7 +283,7 @@ public void testFixtureInitialization() {
Map.of(
"dataset", 13,
"chart", 0,
- "container", 1,
+ "container", 2,
"dashboard", 0,
"tag", 0,
"mlmodel", 0);
@@ -903,6 +903,26 @@ public void testContainerAutoComplete() {
});
}
+ @Test
+ public void testContainerAutoComplete_with_exactMatch_onTop() {
+ List.of("container")
+ .forEach(
+ query -> {
+ try {
+ AutoCompleteResults result =
+ autocomplete(
+ getOperationContext(), new ContainerType(getEntityClient()), query);
+ assertTrue(
+ result.getSuggestions().get(0).equals("container"),
+ String.format(
+ "Expected query:`%s` on top of suggestions, found %s",
+ query, result.getSuggestions().get(0)));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
@Test
public void testGroupAutoComplete() {
List.of("T", "Te", "Tes", "Test ", "Test G", "Test Gro", "Test Group ")
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java
index 572d79ebf2f0c..c5205906e9d37 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java
@@ -5,6 +5,10 @@
import static org.testng.Assert.assertTrue;
import com.linkedin.metadata.TestEntitySpecBuilder;
+import com.linkedin.metadata.config.search.ExactMatchConfiguration;
+import com.linkedin.metadata.config.search.PartialConfiguration;
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.config.search.WordGramConfiguration;
import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration;
import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration;
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
@@ -32,14 +36,44 @@
import org.testng.annotations.Test;
public class AutocompleteRequestHandlerTest {
- private AutocompleteRequestHandler handler =
- AutocompleteRequestHandler.getBuilder(
- TestEntitySpecBuilder.getSpec(),
- CustomSearchConfiguration.builder().build(),
- QueryFilterRewriteChain.EMPTY);
+ private static SearchConfiguration testQueryConfig;
+ private static AutocompleteRequestHandler handler;
private OperationContext mockOpContext =
TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class));
+ static {
+ testQueryConfig = new SearchConfiguration();
+ testQueryConfig.setMaxTermBucketSize(20);
+
+ ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration();
+ exactMatchConfiguration.setExclusive(false);
+ exactMatchConfiguration.setExactFactor(10.0f);
+ exactMatchConfiguration.setWithPrefix(true);
+ exactMatchConfiguration.setPrefixFactor(6.0f);
+ exactMatchConfiguration.setCaseSensitivityFactor(0.7f);
+ exactMatchConfiguration.setEnableStructured(true);
+
+ WordGramConfiguration wordGramConfiguration = new WordGramConfiguration();
+ wordGramConfiguration.setTwoGramFactor(1.2f);
+ wordGramConfiguration.setThreeGramFactor(1.5f);
+ wordGramConfiguration.setFourGramFactor(1.8f);
+
+ PartialConfiguration partialConfiguration = new PartialConfiguration();
+ partialConfiguration.setFactor(0.4f);
+ partialConfiguration.setUrnFactor(0.7f);
+
+ testQueryConfig.setExactMatch(exactMatchConfiguration);
+ testQueryConfig.setWordGram(wordGramConfiguration);
+ testQueryConfig.setPartial(partialConfiguration);
+
+ handler =
+ AutocompleteRequestHandler.getBuilder(
+ TestEntitySpecBuilder.getSpec(),
+ CustomSearchConfiguration.builder().build(),
+ QueryFilterRewriteChain.EMPTY,
+ testQueryConfig);
+ }
+
private static final QueryConfiguration TEST_QUERY_CONFIG =
QueryConfiguration.builder()
.queryRegex(".*")
@@ -88,9 +122,12 @@ public void testDefaultAutocompleteRequest() {
BoolQueryBuilder wrapper =
(BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query();
BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper);
- assertEquals(query.should().size(), 3);
+ assertEquals(query.should().size(), 4);
- MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2);
+ MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0);
+ assertEquals("keyPart1.keyword", matchQueryBuilder.fieldName());
+
+ MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(3);
Map queryFields = autocompleteQuery.fields();
assertTrue(queryFields.containsKey("keyPart1.ngram"));
assertTrue(queryFields.containsKey("keyPart1.ngram._2gram"));
@@ -99,7 +136,7 @@ public void testDefaultAutocompleteRequest() {
assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX);
MatchPhrasePrefixQueryBuilder prefixQuery =
- (MatchPhrasePrefixQueryBuilder) query.should().get(0);
+ (MatchPhrasePrefixQueryBuilder) query.should().get(1);
assertEquals("keyPart1.delimited", prefixQuery.fieldName());
assertEquals(wrapper.mustNot().size(), 1);
@@ -108,15 +145,16 @@ public void testDefaultAutocompleteRequest() {
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List highlightedFields = highlightBuilder.fields();
- assertEquals(highlightedFields.size(), 8);
+ assertEquals(highlightedFields.size(), 9);
assertEquals(highlightedFields.get(0).name(), "keyPart1");
assertEquals(highlightedFields.get(1).name(), "keyPart1.*");
assertEquals(highlightedFields.get(2).name(), "keyPart1.ngram");
assertEquals(highlightedFields.get(3).name(), "keyPart1.delimited");
- assertEquals(highlightedFields.get(4).name(), "urn");
- assertEquals(highlightedFields.get(5).name(), "urn.*");
- assertEquals(highlightedFields.get(6).name(), "urn.ngram");
- assertEquals(highlightedFields.get(7).name(), "urn.delimited");
+ assertEquals(highlightedFields.get(4).name(), "keyPart1.keyword");
+ assertEquals(highlightedFields.get(5).name(), "urn");
+ assertEquals(highlightedFields.get(6).name(), "urn.*");
+ assertEquals(highlightedFields.get(7).name(), "urn.ngram");
+ assertEquals(highlightedFields.get(8).name(), "urn.delimited");
}
@Test
@@ -130,9 +168,12 @@ public void testAutocompleteRequestWithField() {
(BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query();
assertEquals(wrapper.should().size(), 1);
BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper);
- assertEquals(query.should().size(), 2);
+ assertEquals(query.should().size(), 3);
- MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(1);
+ MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0);
+ assertEquals("field.keyword", matchQueryBuilder.fieldName());
+
+ MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2);
Map queryFields = autocompleteQuery.fields();
assertTrue(queryFields.containsKey("field.ngram"));
assertTrue(queryFields.containsKey("field.ngram._2gram"));
@@ -141,7 +182,7 @@ public void testAutocompleteRequestWithField() {
assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX);
MatchPhrasePrefixQueryBuilder prefixQuery =
- (MatchPhrasePrefixQueryBuilder) query.should().get(0);
+ (MatchPhrasePrefixQueryBuilder) query.should().get(1);
assertEquals("field.delimited", prefixQuery.fieldName());
MatchQueryBuilder removedFilter = (MatchQueryBuilder) wrapper.mustNot().get(0);
@@ -149,11 +190,12 @@ public void testAutocompleteRequestWithField() {
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List highlightedFields = highlightBuilder.fields();
- assertEquals(highlightedFields.size(), 4);
+ assertEquals(highlightedFields.size(), 5);
assertEquals(highlightedFields.get(0).name(), "field");
assertEquals(highlightedFields.get(1).name(), "field.*");
assertEquals(highlightedFields.get(2).name(), "field.ngram");
assertEquals(highlightedFields.get(3).name(), "field.delimited");
+ assertEquals(highlightedFields.get(4).name(), "field.keyword");
}
@Test
@@ -174,7 +216,8 @@ public void testCustomConfigWithDefault() {
.build())
.build()))
.build(),
- QueryFilterRewriteChain.EMPTY);
+ QueryFilterRewriteChain.EMPTY,
+ testQueryConfig);
SearchRequest autocompleteRequest =
withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10);
@@ -200,7 +243,8 @@ public void testCustomConfigWithDefault() {
.build())
.build()))
.build(),
- QueryFilterRewriteChain.EMPTY);
+ QueryFilterRewriteChain.EMPTY,
+ testQueryConfig);
autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10);
sourceBuilder = autocompleteRequest.source();
@@ -215,7 +259,7 @@ public void testCustomConfigWithDefault() {
BoolQueryBuilder defaultQuery =
(BoolQueryBuilder)
shouldQueries.stream().filter(qb -> qb instanceof BoolQueryBuilder).findFirst().get();
- assertEquals(defaultQuery.should().size(), 3);
+ assertEquals(defaultQuery.should().size(), 4);
// Custom
customQuery =
@@ -243,7 +287,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() {
.build())
.build()))
.build(),
- QueryFilterRewriteChain.EMPTY);
+ QueryFilterRewriteChain.EMPTY,
+ testQueryConfig);
SearchRequest autocompleteRequest =
withInherit.getSearchRequest(mockOpContext, "input", null, null, 10);
@@ -282,7 +327,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() {
.build())
.build()))
.build(),
- QueryFilterRewriteChain.EMPTY);
+ QueryFilterRewriteChain.EMPTY,
+ testQueryConfig);
autocompleteRequest =
noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10);
@@ -345,7 +391,8 @@ public void testCustomConfigWithFunctionScores() {
"deprecated", Map.of("value", false)))))))
.build()))
.build(),
- QueryFilterRewriteChain.EMPTY);
+ QueryFilterRewriteChain.EMPTY,
+ testQueryConfig);
SearchRequest autocompleteRequest =
explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10);
@@ -398,7 +445,8 @@ public void testCustomConfigWithFunctionScores() {
"deprecated", Map.of("value", false)))))))
.build()))
.build(),
- QueryFilterRewriteChain.EMPTY);
+ QueryFilterRewriteChain.EMPTY,
+ testQueryConfig);
autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10);
sourceBuilder = autocompleteRequest.source();
@@ -411,7 +459,7 @@ public void testCustomConfigWithFunctionScores() {
assertEquals(customQuery, QueryBuilders.matchAllQuery());
// standard query still present
- assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 3);
+ assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 4);
// custom functions included
assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions);
diff --git a/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz b/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz
index 2fa49c810abfa..bd36747255f86 100644
Binary files a/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz and b/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz differ
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index 21403191733bb..88b7996412755 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -674,6 +674,12 @@ plugins:
supportedEntityAspectNames:
- entityName: '*'
aspectName: '*'
+ - className: 'com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator'
+ enabled: true
+ spring:
+ enabled: true
+ packageScan:
+ - com.linkedin.gms.factory.plugins
mcpSideEffects:
- className: 'com.linkedin.metadata.structuredproperties.hooks.PropertyDefinitionDeleteSideEffect'
packageScan:
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java
index 4a2095685abe1..943b1c7184a60 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java
@@ -1,5 +1,8 @@
package com.linkedin.gms.factory.plugins;
+import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_ENTITY_NAME;
+import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_RESULT_ASPECT_NAME;
import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME;
import com.linkedin.metadata.Constants;
@@ -7,6 +10,9 @@
import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig;
import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect;
import com.linkedin.metadata.aspect.plugins.hooks.MutationHook;
+import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator;
+import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator;
+import com.linkedin.metadata.aspect.validation.FieldPathValidator;
import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect;
import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect;
import com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry;
@@ -21,6 +27,7 @@
@Configuration
@Slf4j
public class SpringStandardPluginConfiguration {
+ private static final String ALL = "*";
@Value("${metadataChangeProposal.validation.ignoreUnknown}")
private boolean ignoreUnknownEnabled;
@@ -104,4 +111,43 @@ public MCPSideEffect dataProductUnsetSideEffect() {
log.info("Initialized {}", SchemaFieldSideEffect.class.getName());
return new DataProductUnsetSideEffect().setConfig(config);
}
+
+ @Bean
+ public AspectPayloadValidator fieldPathValidator() {
+ return new FieldPathValidator()
+ .setConfig(
+ AspectPluginConfig.builder()
+ .className(FieldPathValidator.class.getName())
+ .enabled(true)
+ .supportedOperations(
+ List.of("CREATE", "CREATE_ENTITY", "UPSERT", "UPDATE", "RESTATE"))
+ .supportedEntityAspectNames(
+ List.of(
+ AspectPluginConfig.EntityAspectName.builder()
+ .entityName(ALL)
+ .aspectName(SCHEMA_METADATA_ASPECT_NAME)
+ .build(),
+ AspectPluginConfig.EntityAspectName.builder()
+ .entityName(ALL)
+ .aspectName(EDITABLE_SCHEMA_METADATA_ASPECT_NAME)
+ .build()))
+ .build());
+ }
+
+ @Bean
+ public AspectPayloadValidator dataHubExecutionRequestResultValidator() {
+ return new ExecutionRequestResultValidator()
+ .setConfig(
+ AspectPluginConfig.builder()
+ .className(ExecutionRequestResultValidator.class.getName())
+ .enabled(true)
+ .supportedOperations(List.of("UPSERT", "UPDATE"))
+ .supportedEntityAspectNames(
+ List.of(
+ AspectPluginConfig.EntityAspectName.builder()
+ .entityName(EXECUTION_REQUEST_ENTITY_NAME)
+ .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME)
+ .build()))
+ .build());
+ }
}