diff --git a/.github/workflows/contributor-open-pr-comment.yml b/.github/workflows/contributor-open-pr-comment.yml new file mode 100644 index 0000000000000..2f700290ee0f2 --- /dev/null +++ b/.github/workflows/contributor-open-pr-comment.yml @@ -0,0 +1,42 @@ +name: PR Comment + +on: + pull_request: + types: [opened] + +permissions: + pull-requests: write + +jobs: + post-pr-opened-comment: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Get and Format Username (PR only) + if: github.event_name == 'pull_request' + run: | + formatted_username=$(echo "${{ github.event.pull_request.user.login }}" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g') + echo "FORMATTED_USERNAME=$formatted_username" >> $GITHUB_ENV + + - name: Create Comment (PR only) + if: github.event_name == 'pull_request' + uses: actions/github-script@v6 + with: + script: | + if (context.payload.pull_request) { + const prUser = process.env.FORMATTED_USERNAME; + const url = `https://contributors.datahubproject.io/${prUser}`; + const body = `Hello @${prUser} :smile: \n\n Thank you so much for opening a pull request!\n\n![Image](https://contributors.datahubproject.io/api/og?userId=${{ github.event.pull_request.user.login }})\nYou can check out your contributor card and see all your past stats [here](${url})!`; + + // Create a comment on the PR + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: body + }); + } else { + console.log('Not a pull request event.'); + } diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index ef5770ccb167a..a1ae7cee1736f 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -760,14 +760,18 @@ jobs: needs: [setup, datahub_ingestion_base_slim_build] if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: + - name: Check out the repo + uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - name: Check out the repo - uses: acryldata/sane-checkout-action@v3 - name: Build codegen if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }} run: ./gradlew :metadata-ingestion:codegen @@ -852,14 +856,18 @@ jobs: needs: [setup, datahub_ingestion_base_full_build] if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} steps: + - name: Check out the repo + uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - name: Check out the repo - uses: acryldata/sane-checkout-action@v3 - name: Build codegen if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} run: ./gradlew :metadata-ingestion:codegen @@ -983,16 +991,16 @@ jobs: run: df -h . && docker images - name: Check out the repo uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - cache: "pip" - name: Login to DockerHub uses: docker/login-action@v3 if: ${{ needs.setup.outputs.docker-login == 'true' }} diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index c718febca398a..92dcb3d8ac289 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -26,6 +26,7 @@ concurrency: jobs: metadata-ingestion: runs-on: ubuntu-latest + timeout-minutes: 40 env: SPARK_VERSION: 3.3.2 DATAHUB_TELEMETRY_ENABLED: false diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 7018b42949e89..5ee2223d71b03 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -57,17 +57,16 @@ jobs: - name: Disk Check run: df -h . && docker images - uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - uses: actions/setup-python@v5 - if: ${{ needs.setup.outputs.ingestion_change == 'true' }} - with: - python-version: "3.10" - cache: "pip" - name: Gradle build (and test) run: | ./gradlew :metadata-io:test diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index b0cf5bfa35eca..b016f9518ea6c 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -65,6 +65,14 @@ module.exports = { // isCloseable: false, // }, // }), + announcementBar: { + id: "announcement-2", + content: + '
NEW

Join us at Metadata & AI Summit, Oct. 29 & 30!

Register →
', + backgroundColor: "#111", + textColor: "#ffffff", + isCloseable: false, + }, colorMode: { // Only support light mode. defaultMode: 'light', diff --git a/docs-website/src/pages/index.js b/docs-website/src/pages/index.js index 1c36b81a2da95..e1c94780715d3 100644 --- a/docs-website/src/pages/index.js +++ b/docs-website/src/pages/index.js @@ -44,7 +44,7 @@ function Home() { return !siteConfig.customFields.isSaas ? ( {isTourModalVisible ? (
diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss index e256c752b4a0b..96ca07d45d0c2 100644 --- a/docs-website/src/styles/global.scss +++ b/docs-website/src/styles/global.scss @@ -31,7 +31,7 @@ --ifm-navbar-item-padding-horizontal: 1rem; /* Announcement Bar */ - --docusaurus-announcement-bar-height: 60px !important; + --docusaurus-announcement-bar-height: 48px !important; /* Rule */ --ifm-hr-border-width: 1px 0 0 0; @@ -141,8 +141,9 @@ div[class^="announcementBar"] { } a { - color: var(--ifm-button-color); + color: #EFB300; text-decoration: none; + font-size: 1rem } } } diff --git a/docs/advanced/mcp-mcl.md b/docs/advanced/mcp-mcl.md index 9efb9b794954d..333891ba1a95d 100644 --- a/docs/advanced/mcp-mcl.md +++ b/docs/advanced/mcp-mcl.md @@ -14,6 +14,18 @@ To mitigate these downsides, we are committed to providing cross-language client Ultimately, we intend to realize a state in which the Entities and Aspect schemas can be altered without requiring generated code and without maintaining a single mega-model schema (looking at you, Snapshot.pdl). The intention is that changes to the metadata model become even easier than they are today. +### Synchronous Ingestion Architecture + +

+ +

+ +### Asynchronous Ingestion Architecture + +

+ +

+ ## Modeling A Metadata Change Proposal is defined (in PDL) as follows diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 00e020bd2a387..dbcc7da846703 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -24,6 +24,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - #11484 - Metadata service authentication enabled by default - #11484 - Rest API authorization enabled by default - #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. +- #11619 - schema field/column paths can no longer be empty strings +- #11619 - schema field/column paths can no longer be duplicated within the schema ### Potential Downtime diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java new file mode 100644 index 0000000000000..7c279254e1bc3 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/FieldPathValidator.java @@ -0,0 +1,116 @@ +package com.linkedin.metadata.aspect.validation; + +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.schema.EditableSchemaFieldInfo; +import com.linkedin.schema.EditableSchemaMetadata; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaMetadata; +import java.util.Collection; +import java.util.Optional; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; + +/** + * 1. Validates the Schema Field Path specification, specifically that all field IDs must be unique + * across all fields within a schema. 2. Validates that the field path id is not empty. + * + * @see Field + * Path V2 docs + */ +@Setter +@Getter +@Accessors(chain = true) +public class FieldPathValidator extends AspectPayloadValidator { + @Nonnull private AspectPluginConfig config; + + /** Prevent any MCP for SchemaMetadata where field ids are duplicated. */ + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + + mcpItems.forEach( + i -> { + if (i.getAspectName().equals(SCHEMA_METADATA_ASPECT_NAME)) { + processSchemaMetadataAspect(i, exceptions); + } else { + processEditableSchemaMetadataAspect(i, exceptions); + } + }); + + return exceptions.streamAllExceptions(); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + private static void processEditableSchemaMetadataAspect( + BatchItem i, ValidationExceptionCollection exceptions) { + final EditableSchemaMetadata schemaMetadata = i.getAspect(EditableSchemaMetadata.class); + final long uniquePaths = + validateAndCount( + i, + schemaMetadata.getEditableSchemaFieldInfo().stream() + .map(EditableSchemaFieldInfo::getFieldPath), + exceptions); + + if (uniquePaths != schemaMetadata.getEditableSchemaFieldInfo().size()) { + exceptions.addException( + i, + String.format( + "Cannot perform %s action on proposal. EditableSchemaMetadata aspect has duplicated field paths", + i.getChangeType())); + } + } + + private static void processSchemaMetadataAspect( + BatchItem i, ValidationExceptionCollection exceptions) { + final SchemaMetadata schemaMetadata = i.getAspect(SchemaMetadata.class); + final long uniquePaths = + validateAndCount( + i, schemaMetadata.getFields().stream().map(SchemaField::getFieldPath), exceptions); + + if (uniquePaths != schemaMetadata.getFields().size()) { + exceptions.addException( + i, + String.format( + "Cannot perform %s action on proposal. SchemaMetadata aspect has duplicated field paths", + i.getChangeType())); + } + } + + private static long validateAndCount( + BatchItem i, Stream fieldPaths, ValidationExceptionCollection exceptions) { + return fieldPaths + .distinct() + // inspect the stream of fieldPath validation errors since we're already iterating + .peek( + fieldPath -> + validateFieldPath(fieldPath) + .ifPresent(message -> exceptions.addException(i, message))) + .count(); + } + + private static Optional validateFieldPath(String fieldPath) { + if (fieldPath == null || fieldPath.isEmpty()) { + return Optional.of("SchemaMetadata aspect has empty field path."); + } + return Optional.empty(); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java new file mode 100644 index 0000000000000..bd5912764edce --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/FieldPathValidatorTest.java @@ -0,0 +1,233 @@ +package com.linkedin.metadata.aspect.validators; + +import static com.linkedin.metadata.Constants.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.validation.CreateIfNotExistsValidator; +import com.linkedin.metadata.aspect.validation.FieldPathValidator; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.schema.EditableSchemaFieldInfo; +import com.linkedin.schema.EditableSchemaFieldInfoArray; +import com.linkedin.schema.EditableSchemaMetadata; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldArray; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.schema.StringType; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nullable; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class FieldPathValidatorTest { + + private static final AspectPluginConfig validatorConfig = + AspectPluginConfig.builder() + .supportedOperations( + Arrays.stream(ChangeType.values()) + .map(Objects::toString) + .collect(Collectors.toList())) + .className(CreateIfNotExistsValidator.class.getName()) + .supportedEntityAspectNames(List.of(AspectPluginConfig.EntityAspectName.ALL)) + .enabled(true) + .build(); + private EntityRegistry entityRegistry; + private RetrieverContext mockRetrieverContext; + private static final DatasetUrn TEST_DATASET_URN; + private final FieldPathValidator test = new FieldPathValidator().setConfig(validatorConfig); + + static { + try { + TEST_DATASET_URN = + DatasetUrn.createFromUrn( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,test,PROD)")); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @BeforeTest + public void init() { + entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + GraphRetriever mockGraphRetriever = mock(GraphRetriever.class); + mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + } + + @Test + public void testValidateNonDuplicatedSchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(false); + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 0); + } + + @Test + public void testValidateDuplicatedSchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(true); + + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 1); + } + + @Test + public void testValidateNonDuplicatedEditableSchemaFieldPath() { + final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(false); + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 0); + } + + @Test + public void testValidateDuplicatedEditableSchemaFieldPath() { + final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(true); + + assertEquals( + test.validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .count(), + 1); + } + + @Test + public void testEmptySchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(false, ""); + TestMCP testItem = + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_DATASET_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_DATASET_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_DATASET_URN.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build(); + + Set exceptions = + test.validateProposed(Set.of(testItem), mockRetrieverContext).collect(Collectors.toSet()); + + assertEquals( + exceptions, + Set.of( + AspectValidationException.forItem( + testItem, "SchemaMetadata aspect has empty field path."))); + } + + private static SchemaMetadata getMockSchemaMetadataAspect(boolean duplicateFields) { + return getMockSchemaMetadataAspect(duplicateFields, null); + } + + private static SchemaMetadata getMockSchemaMetadataAspect( + boolean duplicateFields, @Nullable String fieldPath) { + List fields = new ArrayList<>(); + fields.add( + new SchemaField() + .setType( + new SchemaFieldDataType() + .setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNullable(false) + .setNativeDataType("string") + .setFieldPath(fieldPath == null ? "test" : fieldPath)); + + if (duplicateFields) { + fields.add( + new SchemaField() + .setType( + new SchemaFieldDataType() + .setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNullable(false) + .setNativeDataType("string") + .setFieldPath(fieldPath == null ? "test" : fieldPath)); + } + + return new SchemaMetadata() + .setPlatform(TEST_DATASET_URN.getPlatformEntity()) + .setFields(new SchemaFieldArray(fields)); + } + + private static EditableSchemaMetadata getMockEditableSchemaMetadataAspect( + boolean duplicateFields) { + + List fields = new ArrayList<>(); + fields.add(new EditableSchemaFieldInfo().setFieldPath("test")); + + if (duplicateFields) { + fields.add(new EditableSchemaFieldInfo().setFieldPath("test")); + } + + return new EditableSchemaMetadata() + .setEditableSchemaFieldInfo(new EditableSchemaFieldInfoArray(fields)); + } +} diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 6988dd4b3db3e..9cc9e7eac58ac 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -319,6 +319,13 @@ public class Constants { public static final String EXECUTION_REQUEST_INPUT_ASPECT_NAME = "dataHubExecutionRequestInput"; public static final String EXECUTION_REQUEST_SIGNAL_ASPECT_NAME = "dataHubExecutionRequestSignal"; public static final String EXECUTION_REQUEST_RESULT_ASPECT_NAME = "dataHubExecutionRequestResult"; + public static final String EXECUTION_REQUEST_STATUS_RUNNING = "RUNNING"; + public static final String EXECUTION_REQUEST_STATUS_FAILURE = "FAILURE"; + public static final String EXECUTION_REQUEST_STATUS_SUCCESS = "SUCCESS"; + public static final String EXECUTION_REQUEST_STATUS_TIMEOUT = "TIMEOUT"; + public static final String EXECUTION_REQUEST_STATUS_CANCELLED = "CANCELLED"; + public static final String EXECUTION_REQUEST_STATUS_ABORTED = "ABORTED"; + public static final String EXECUTION_REQUEST_STATUS_DUPLICATE = "DUPLICATE"; // DataHub Access Token public static final String ACCESS_TOKEN_KEY_ASPECT_NAME = "dataHubAccessTokenKey"; diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py index 98629ba030695..2b2f992249f1e 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py @@ -1,6 +1,7 @@ import datetime import logging -import uuid +import random +import string from typing import Any, Dict, List, Optional from pydantic import Field, validator @@ -71,6 +72,15 @@ class FlagsConfig(ConfigModel): ) +def _generate_run_id(source_type: Optional[str] = None) -> str: + current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) + + if source_type is None: + source_type = "ingestion" + return f"{source_type}-{current_time}-{random_suffix}" + + class PipelineConfig(ConfigModel): source: SourceConfig sink: Optional[DynamicTypedConfig] = None @@ -91,12 +101,11 @@ def run_id_should_be_semantic( cls, v: Optional[str], values: Dict[str, Any], **kwargs: Any ) -> str: if v == DEFAULT_RUN_ID: + source_type = None if "source" in values and hasattr(values["source"], "type"): source_type = values["source"].type - current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") - return f"{source_type}-{current_time}" - return str(uuid.uuid1()) # default run_id if we cannot infer a source type + return _generate_run_id(source_type) else: assert v is not None return v diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index 61b222f8d2dd5..155773f9898b4 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -27,6 +27,8 @@ r"root\[\d+\]\['aspect'\]\['json'\]\['lastUpdatedTimestamp'\]", r"root\[\d+\]\['aspect'\]\['json'\]\['created'\]", r"root\[\d+\]\['aspect'\]\['json'\]\['lastModified'\]", + r"root\[\d+\].*?\['systemMetadata'\]\['runId'\]", + r"root\[\d+\].*?\['systemMetadata'\]\['lastRunId'\]", ] @@ -82,6 +84,8 @@ def assert_metadata_files_equal( json_path = f"root[{i}]['aspect']['json'][{j}]['value']" ignore_paths = (*ignore_paths, re.escape(json_path)) + ignore_paths = (*ignore_paths, *default_exclude_paths) + diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order) if diff and update_golden: if isinstance(diff, MCPDiff) and diff.is_delta_valid: diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java new file mode 100644 index 0000000000000..b77d3b48d5bd5 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidator.java @@ -0,0 +1,70 @@ +package com.linkedin.metadata.aspect.validation; + +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_ABORTED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_CANCELLED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_DUPLICATE; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_SUCCESS; + +import com.linkedin.execution.ExecutionRequestResult; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import java.util.Collection; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +/** A Validator for StructuredProperties Aspect that is attached to entities like Datasets, etc. */ +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class ExecutionRequestResultValidator extends AspectPayloadValidator { + private static final Set IMMUTABLE_STATUS = + Set.of( + EXECUTION_REQUEST_STATUS_ABORTED, + EXECUTION_REQUEST_STATUS_CANCELLED, + EXECUTION_REQUEST_STATUS_SUCCESS, + EXECUTION_REQUEST_STATUS_DUPLICATE); + + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return changeMCPs.stream() + .filter(item -> item.getPreviousRecordTemplate() != null) + .map( + item -> { + ExecutionRequestResult existingResult = + item.getPreviousAspect(ExecutionRequestResult.class); + + if (IMMUTABLE_STATUS.contains(existingResult.getStatus())) { + ExecutionRequestResult currentResult = item.getAspect(ExecutionRequestResult.class); + return AspectValidationException.forItem( + item, + String.format( + "Invalid update to immutable state for aspect dataHubExecutionRequestResult. Execution urn: %s previous status: %s. Denied status update: %s", + item.getUrn(), existingResult.getStatus(), currentResult.getStatus())); + } + + return null; + }) + .filter(Objects::nonNull); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index f09a81c0c8b89..2d7db075e676f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -370,7 +370,7 @@ public AutoCompleteResult autoComplete( IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = AutocompleteRequestHandler.getBuilder( - entitySpec, customSearchConfiguration, queryFilterRewriteChain); + entitySpec, customSearchConfiguration, queryFilterRewriteChain, searchConfiguration); SearchRequest req = builder.getSearchRequest( opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 294efb069a904..45359285b4a04 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES; import static com.linkedin.metadata.search.utils.ESAccessControlUtil.restrictUrn; import static com.linkedin.metadata.search.utils.ESUtils.applyDefaultSearchFilters; @@ -8,6 +7,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; @@ -35,6 +35,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.index.query.*; @@ -46,7 +47,7 @@ @Slf4j public class AutocompleteRequestHandler { - private final List _defaultAutocompleteFields; + private final List _defaultAutocompleteFields; private final Map> searchableFieldTypes; private static final Map @@ -56,11 +57,13 @@ public class AutocompleteRequestHandler { private final EntitySpec entitySpec; private final QueryFilterRewriteChain queryFilterRewriteChain; + private final SearchConfiguration searchConfiguration; public AutocompleteRequestHandler( @Nonnull EntitySpec entitySpec, @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain, + @Nonnull SearchConfiguration searchConfiguration) { this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); @@ -69,8 +72,12 @@ public AutocompleteRequestHandler( fieldSpecs.stream() .map(SearchableFieldSpec::getSearchableAnnotation) .filter(SearchableAnnotation::isEnableAutocomplete) - .map(SearchableAnnotation::getFieldName), - Stream.of("urn")) + .map( + searchableAnnotation -> + Pair.of( + searchableAnnotation.getFieldName(), + Double.toString(searchableAnnotation.getBoostScore()))), + Stream.of(Pair.of("urn", "1.0"))) .collect(Collectors.toList()); searchableFieldTypes = fieldSpecs.stream() @@ -87,17 +94,22 @@ public AutocompleteRequestHandler( return set1; })); this.queryFilterRewriteChain = queryFilterRewriteChain; + this.searchConfiguration = searchConfiguration; } public static AutocompleteRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain, + @Nonnull SearchConfiguration searchConfiguration) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( entitySpec, k -> new AutocompleteRequestHandler( - entitySpec, customSearchConfiguration, queryFilterRewriteChain)); + entitySpec, + customSearchConfiguration, + queryFilterRewriteChain, + searchConfiguration)); } public SearchRequest getSearchRequest( @@ -169,7 +181,7 @@ private BoolQueryBuilder getQuery( public BoolQueryBuilder getQuery( @Nonnull ObjectMapper objectMapper, @Nullable AutocompleteConfiguration customAutocompleteConfig, - List autocompleteFields, + List autocompleteFields, @Nonnull String query) { BoolQueryBuilder finalQuery = @@ -189,7 +201,7 @@ public BoolQueryBuilder getQuery( private Optional getAutocompleteQuery( @Nullable AutocompleteConfiguration customConfig, - List autocompleteFields, + List autocompleteFields, @Nonnull String query) { Optional result = Optional.empty(); @@ -200,33 +212,39 @@ private Optional getAutocompleteQuery( return result; } - private static BoolQueryBuilder defaultQuery( - List autocompleteFields, @Nonnull String query) { + private BoolQueryBuilder defaultQuery(List autocompleteFields, @Nonnull String query) { BoolQueryBuilder finalQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); // Search for exact matches with higher boost and ngram matches - MultiMatchQueryBuilder autocompleteQueryBuilder = + MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(query).type(MultiMatchQueryBuilder.Type.BOOL_PREFIX); - final float urnBoost = - Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")); autocompleteFields.forEach( - fieldName -> { - if ("urn".equals(fieldName)) { - autocompleteQueryBuilder.field(fieldName + ".ngram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._2gram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._3gram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._4gram", urnBoost); - } else { - autocompleteQueryBuilder.field(fieldName + ".ngram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._2gram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._3gram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._4gram"); + pair -> { + final String fieldName = (String) pair.getLeft(); + final float boostScore = Float.parseFloat((String) pair.getRight()); + multiMatchQueryBuilder.field(fieldName + ".ngram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._2gram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._3gram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._4gram"); + multiMatchQueryBuilder.field(fieldName + ".delimited"); + if (!fieldName.equalsIgnoreCase("urn")) { + multiMatchQueryBuilder.field(fieldName + ".ngram", boostScore); + multiMatchQueryBuilder.field( + fieldName + ".ngram._2gram", + boostScore * (searchConfiguration.getWordGram().getTwoGramFactor())); + multiMatchQueryBuilder.field( + fieldName + ".ngram._3gram", + boostScore * (searchConfiguration.getWordGram().getThreeGramFactor())); + multiMatchQueryBuilder.field( + fieldName + ".ngram._4gram", + boostScore * (searchConfiguration.getWordGram().getFourGramFactor())); + finalQuery.should( + QueryBuilders.matchQuery(fieldName + ".keyword", query).boost(boostScore)); } - autocompleteQueryBuilder.field(fieldName + ".delimited"); finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(fieldName + ".delimited", query)); }); - finalQuery.should(autocompleteQueryBuilder); + finalQuery.should(multiMatchQueryBuilder); return finalQuery; } @@ -241,12 +259,17 @@ private HighlightBuilder getHighlights(@Nullable String field) { // Check for each field name and any subfields getAutocompleteFields(field) .forEach( - fieldName -> - highlightBuilder - .field(fieldName) - .field(fieldName + ".*") - .field(fieldName + ".ngram") - .field(fieldName + ".delimited")); + pair -> { + final String fieldName = (String) pair.getLeft(); + highlightBuilder + .field(fieldName) + .field(fieldName + ".*") + .field(fieldName + ".ngram") + .field(fieldName + ".delimited"); + if (!fieldName.equalsIgnoreCase("urn")) { + highlightBuilder.field(fieldName + ".keyword"); + } + }); // set field match req false for ngram highlightBuilder.fields().stream() @@ -256,9 +279,9 @@ private HighlightBuilder getHighlights(@Nullable String field) { return highlightBuilder; } - private List getAutocompleteFields(@Nullable String field) { - if (field != null && !field.isEmpty()) { - return ImmutableList.of(field); + private List getAutocompleteFields(@Nullable String field) { + if (field != null && !field.isEmpty() && !field.equalsIgnoreCase("urn")) { + return ImmutableList.of(Pair.of(field, "10.0")); } return _defaultAutocompleteFields; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java new file mode 100644 index 0000000000000..f46772ca7b350 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/ExecutionRequestResultValidatorTest.java @@ -0,0 +1,166 @@ +package com.linkedin.metadata.aspect.validation; + +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_ENTITY_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_RESULT_ASPECT_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_ABORTED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_CANCELLED; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_DUPLICATE; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_FAILURE; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_RUNNING; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_SUCCESS; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_STATUS_TIMEOUT; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.execution.ExecutionRequestResult; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.utils.AuditStampUtils; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.testng.annotations.Test; + +public class ExecutionRequestResultValidatorTest { + private static final OperationContext TEST_CONTEXT = + TestOperationContexts.systemContextNoSearchAuthorization(); + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(ExecutionRequestResultValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of("UPSERT")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(EXECUTION_REQUEST_ENTITY_NAME) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .build())) + .build(); + private static final Urn TEST_URN = UrnUtils.getUrn("urn:li:dataHubExecutionRequest:xyz"); + + @Test + public void testAllowed() { + ExecutionRequestResultValidator test = new ExecutionRequestResultValidator(); + test.setConfig(TEST_PLUGIN_CONFIG); + + Set allowedUpdateStates = + Set.of( + EXECUTION_REQUEST_STATUS_RUNNING, + EXECUTION_REQUEST_STATUS_FAILURE, + EXECUTION_REQUEST_STATUS_TIMEOUT); + Set destinationStates = new HashSet<>(allowedUpdateStates); + destinationStates.addAll( + Set.of( + EXECUTION_REQUEST_STATUS_ABORTED, + EXECUTION_REQUEST_STATUS_CANCELLED, + EXECUTION_REQUEST_STATUS_SUCCESS, + EXECUTION_REQUEST_STATUS_DUPLICATE)); + + List testItems = + new ArrayList<>( + // Tests with previous state + allowedUpdateStates.stream() + .flatMap( + prevState -> + destinationStates.stream() + .map( + destState -> { + SystemAspect prevData = mock(SystemAspect.class); + when(prevData.getRecordTemplate()) + .thenReturn( + new ExecutionRequestResult().setStatus(prevState)); + return ChangeItemImpl.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_URN) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .recordTemplate( + new ExecutionRequestResult().setStatus(destState)) + .previousSystemAspect(prevData) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(TEST_CONTEXT.getAspectRetriever()); + })) + .toList()); + // Tests with no previous + testItems.addAll( + destinationStates.stream() + .map( + destState -> + ChangeItemImpl.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_URN) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .recordTemplate(new ExecutionRequestResult().setStatus(destState)) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(TEST_CONTEXT.getAspectRetriever())) + .toList()); + + List result = + test.validatePreCommitAspects(testItems, mock(RetrieverContext.class)).toList(); + + assertTrue(result.isEmpty(), "Did not expect any validation errors."); + } + + @Test + public void testDenied() { + ExecutionRequestResultValidator test = new ExecutionRequestResultValidator(); + test.setConfig(TEST_PLUGIN_CONFIG); + + Set deniedUpdateStates = + Set.of( + EXECUTION_REQUEST_STATUS_ABORTED, + EXECUTION_REQUEST_STATUS_CANCELLED, + EXECUTION_REQUEST_STATUS_SUCCESS, + EXECUTION_REQUEST_STATUS_DUPLICATE); + Set destinationStates = new HashSet<>(deniedUpdateStates); + destinationStates.addAll( + Set.of( + EXECUTION_REQUEST_STATUS_RUNNING, + EXECUTION_REQUEST_STATUS_FAILURE, + EXECUTION_REQUEST_STATUS_TIMEOUT)); + + List testItems = + new ArrayList<>( + // Tests with previous state + deniedUpdateStates.stream() + .flatMap( + prevState -> + destinationStates.stream() + .map( + destState -> { + SystemAspect prevData = mock(SystemAspect.class); + when(prevData.getRecordTemplate()) + .thenReturn( + new ExecutionRequestResult().setStatus(prevState)); + return ChangeItemImpl.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_URN) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .recordTemplate( + new ExecutionRequestResult().setStatus(destState)) + .previousSystemAspect(prevData) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(TEST_CONTEXT.getAspectRetriever()); + })) + .toList()); + + List result = + test.validatePreCommitAspects(testItems, mock(RetrieverContext.class)).toList(); + + assertEquals( + result.size(), + deniedUpdateStates.size() * destinationStates.size(), + "Expected ALL items to be denied."); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index bc3c892e07b1b..504eb5f5fc13d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -283,7 +283,7 @@ public void testFixtureInitialization() { Map.of( "dataset", 13, "chart", 0, - "container", 1, + "container", 2, "dashboard", 0, "tag", 0, "mlmodel", 0); @@ -903,6 +903,26 @@ public void testContainerAutoComplete() { }); } + @Test + public void testContainerAutoComplete_with_exactMatch_onTop() { + List.of("container") + .forEach( + query -> { + try { + AutoCompleteResults result = + autocomplete( + getOperationContext(), new ContainerType(getEntityClient()), query); + assertTrue( + result.getSuggestions().get(0).equals("container"), + String.format( + "Expected query:`%s` on top of suggestions, found %s", + query, result.getSuggestions().get(0))); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + @Test public void testGroupAutoComplete() { List.of("T", "Te", "Tes", "Test ", "Test G", "Test Gro", "Test Group ") diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 572d79ebf2f0c..c5205906e9d37 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -5,6 +5,10 @@ import static org.testng.Assert.assertTrue; import com.linkedin.metadata.TestEntitySpecBuilder; +import com.linkedin.metadata.config.search.ExactMatchConfiguration; +import com.linkedin.metadata.config.search.PartialConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.WordGramConfiguration; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; @@ -32,14 +36,44 @@ import org.testng.annotations.Test; public class AutocompleteRequestHandlerTest { - private AutocompleteRequestHandler handler = - AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - CustomSearchConfiguration.builder().build(), - QueryFilterRewriteChain.EMPTY); + private static SearchConfiguration testQueryConfig; + private static AutocompleteRequestHandler handler; private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + static { + testQueryConfig = new SearchConfiguration(); + testQueryConfig.setMaxTermBucketSize(20); + + ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration(); + exactMatchConfiguration.setExclusive(false); + exactMatchConfiguration.setExactFactor(10.0f); + exactMatchConfiguration.setWithPrefix(true); + exactMatchConfiguration.setPrefixFactor(6.0f); + exactMatchConfiguration.setCaseSensitivityFactor(0.7f); + exactMatchConfiguration.setEnableStructured(true); + + WordGramConfiguration wordGramConfiguration = new WordGramConfiguration(); + wordGramConfiguration.setTwoGramFactor(1.2f); + wordGramConfiguration.setThreeGramFactor(1.5f); + wordGramConfiguration.setFourGramFactor(1.8f); + + PartialConfiguration partialConfiguration = new PartialConfiguration(); + partialConfiguration.setFactor(0.4f); + partialConfiguration.setUrnFactor(0.7f); + + testQueryConfig.setExactMatch(exactMatchConfiguration); + testQueryConfig.setWordGram(wordGramConfiguration); + testQueryConfig.setPartial(partialConfiguration); + + handler = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY, + testQueryConfig); + } + private static final QueryConfiguration TEST_QUERY_CONFIG = QueryConfiguration.builder() .queryRegex(".*") @@ -88,9 +122,12 @@ public void testDefaultAutocompleteRequest() { BoolQueryBuilder wrapper = (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); - assertEquals(query.should().size(), 3); + assertEquals(query.should().size(), 4); - MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2); + MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0); + assertEquals("keyPart1.keyword", matchQueryBuilder.fieldName()); + + MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(3); Map queryFields = autocompleteQuery.fields(); assertTrue(queryFields.containsKey("keyPart1.ngram")); assertTrue(queryFields.containsKey("keyPart1.ngram._2gram")); @@ -99,7 +136,7 @@ public void testDefaultAutocompleteRequest() { assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX); MatchPhrasePrefixQueryBuilder prefixQuery = - (MatchPhrasePrefixQueryBuilder) query.should().get(0); + (MatchPhrasePrefixQueryBuilder) query.should().get(1); assertEquals("keyPart1.delimited", prefixQuery.fieldName()); assertEquals(wrapper.mustNot().size(), 1); @@ -108,15 +145,16 @@ public void testDefaultAutocompleteRequest() { assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); List highlightedFields = highlightBuilder.fields(); - assertEquals(highlightedFields.size(), 8); + assertEquals(highlightedFields.size(), 9); assertEquals(highlightedFields.get(0).name(), "keyPart1"); assertEquals(highlightedFields.get(1).name(), "keyPart1.*"); assertEquals(highlightedFields.get(2).name(), "keyPart1.ngram"); assertEquals(highlightedFields.get(3).name(), "keyPart1.delimited"); - assertEquals(highlightedFields.get(4).name(), "urn"); - assertEquals(highlightedFields.get(5).name(), "urn.*"); - assertEquals(highlightedFields.get(6).name(), "urn.ngram"); - assertEquals(highlightedFields.get(7).name(), "urn.delimited"); + assertEquals(highlightedFields.get(4).name(), "keyPart1.keyword"); + assertEquals(highlightedFields.get(5).name(), "urn"); + assertEquals(highlightedFields.get(6).name(), "urn.*"); + assertEquals(highlightedFields.get(7).name(), "urn.ngram"); + assertEquals(highlightedFields.get(8).name(), "urn.delimited"); } @Test @@ -130,9 +168,12 @@ public void testAutocompleteRequestWithField() { (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); assertEquals(wrapper.should().size(), 1); BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); - assertEquals(query.should().size(), 2); + assertEquals(query.should().size(), 3); - MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(1); + MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0); + assertEquals("field.keyword", matchQueryBuilder.fieldName()); + + MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2); Map queryFields = autocompleteQuery.fields(); assertTrue(queryFields.containsKey("field.ngram")); assertTrue(queryFields.containsKey("field.ngram._2gram")); @@ -141,7 +182,7 @@ public void testAutocompleteRequestWithField() { assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX); MatchPhrasePrefixQueryBuilder prefixQuery = - (MatchPhrasePrefixQueryBuilder) query.should().get(0); + (MatchPhrasePrefixQueryBuilder) query.should().get(1); assertEquals("field.delimited", prefixQuery.fieldName()); MatchQueryBuilder removedFilter = (MatchQueryBuilder) wrapper.mustNot().get(0); @@ -149,11 +190,12 @@ public void testAutocompleteRequestWithField() { assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); List highlightedFields = highlightBuilder.fields(); - assertEquals(highlightedFields.size(), 4); + assertEquals(highlightedFields.size(), 5); assertEquals(highlightedFields.get(0).name(), "field"); assertEquals(highlightedFields.get(1).name(), "field.*"); assertEquals(highlightedFields.get(2).name(), "field.ngram"); assertEquals(highlightedFields.get(3).name(), "field.delimited"); + assertEquals(highlightedFields.get(4).name(), "field.keyword"); } @Test @@ -174,7 +216,8 @@ public void testCustomConfigWithDefault() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -200,7 +243,8 @@ public void testCustomConfigWithDefault() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -215,7 +259,7 @@ public void testCustomConfigWithDefault() { BoolQueryBuilder defaultQuery = (BoolQueryBuilder) shouldQueries.stream().filter(qb -> qb instanceof BoolQueryBuilder).findFirst().get(); - assertEquals(defaultQuery.should().size(), 3); + assertEquals(defaultQuery.should().size(), 4); // Custom customQuery = @@ -243,7 +287,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -282,7 +327,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -345,7 +391,8 @@ public void testCustomConfigWithFunctionScores() { "deprecated", Map.of("value", false))))))) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -398,7 +445,8 @@ public void testCustomConfigWithFunctionScores() { "deprecated", Map.of("value", false))))))) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -411,7 +459,7 @@ public void testCustomConfigWithFunctionScores() { assertEquals(customQuery, QueryBuilders.matchAllQuery()); // standard query still present - assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 3); + assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 4); // custom functions included assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); diff --git a/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz b/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz index 2fa49c810abfa..bd36747255f86 100644 Binary files a/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz and b/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz differ diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 21403191733bb..88b7996412755 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -674,6 +674,12 @@ plugins: supportedEntityAspectNames: - entityName: '*' aspectName: '*' + - className: 'com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator' + enabled: true + spring: + enabled: true + packageScan: + - com.linkedin.gms.factory.plugins mcpSideEffects: - className: 'com.linkedin.metadata.structuredproperties.hooks.PropertyDefinitionDeleteSideEffect' packageScan: diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 4a2095685abe1..943b1c7184a60 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -1,5 +1,8 @@ package com.linkedin.gms.factory.plugins; +import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_ENTITY_NAME; +import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_RESULT_ASPECT_NAME; import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; import com.linkedin.metadata.Constants; @@ -7,6 +10,9 @@ import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator; +import com.linkedin.metadata.aspect.validation.FieldPathValidator; import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect; import com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry; @@ -21,6 +27,7 @@ @Configuration @Slf4j public class SpringStandardPluginConfiguration { + private static final String ALL = "*"; @Value("${metadataChangeProposal.validation.ignoreUnknown}") private boolean ignoreUnknownEnabled; @@ -104,4 +111,43 @@ public MCPSideEffect dataProductUnsetSideEffect() { log.info("Initialized {}", SchemaFieldSideEffect.class.getName()); return new DataProductUnsetSideEffect().setConfig(config); } + + @Bean + public AspectPayloadValidator fieldPathValidator() { + return new FieldPathValidator() + .setConfig( + AspectPluginConfig.builder() + .className(FieldPathValidator.class.getName()) + .enabled(true) + .supportedOperations( + List.of("CREATE", "CREATE_ENTITY", "UPSERT", "UPDATE", "RESTATE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(ALL) + .aspectName(SCHEMA_METADATA_ASPECT_NAME) + .build(), + AspectPluginConfig.EntityAspectName.builder() + .entityName(ALL) + .aspectName(EDITABLE_SCHEMA_METADATA_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + public AspectPayloadValidator dataHubExecutionRequestResultValidator() { + return new ExecutionRequestResultValidator() + .setConfig( + AspectPluginConfig.builder() + .className(ExecutionRequestResultValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of("UPSERT", "UPDATE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(EXECUTION_REQUEST_ENTITY_NAME) + .aspectName(EXECUTION_REQUEST_RESULT_ASPECT_NAME) + .build())) + .build()); + } }