diff --git a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx index c8d4188886750..f624d7e6ca796 100644 --- a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx +++ b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx @@ -17,6 +17,7 @@ import { getStructuredReport, RUNNING, SUCCESS, + SUCCEEDED_WITH_WARNINGS, } from '../utils'; import { ExecutionRequestResult } from '../../../../types.generated'; import { StructuredReport } from './reporting/StructuredReport'; @@ -190,7 +191,7 @@ export const ExecutionDetailsModal = ({ urn, open, onClose }: Props) => { {resultSummaryText} {structuredReport ? : null} - {status === SUCCESS && ( + {(status === SUCCESS || status === SUCCEEDED_WITH_WARNINGS) && ( {data?.executionRequest?.id && } diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index e55ce64040160..2c9235e5196f7 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -331,6 +331,7 @@ module.exports = { }, { "DataHub Cloud Release History": [ + "docs/managed-datahub/release-notes/v_0_3_7", "docs/managed-datahub/release-notes/v_0_3_6", "docs/managed-datahub/release-notes/v_0_3_5", "docs/managed-datahub/release-notes/v_0_3_4", diff --git a/docs/managed-datahub/release-notes/v_0_3_7.md b/docs/managed-datahub/release-notes/v_0_3_7.md new file mode 100644 index 0000000000000..cc01ceb52c8d8 --- /dev/null +++ b/docs/managed-datahub/release-notes/v_0_3_7.md @@ -0,0 +1,117 @@ +# v0.3.7 +--- + +Release Availability Date +--- +13-Nov-2024 + +Recommended CLI/SDK +--- +- `v0.14.1.7` with release notes at https://github.com/datahub-project/datahub/releases/tag/v0.14.1.7 + +If you are using an older CLI/SDK version, then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, GitHub Actions, Airflow, in Python SDK somewhere, Java SDK, etc. This is a strong recommendation to upgrade, as we keep on pushing fixes in the CLI, and it helps us support you better. + +## Release Changelog +--- + +- All changes in https://github.com/datahub-project/datahub/releases/tag/v0.14.1 + - Note Breaking Changes: https://datahubproject.io/docs/how/updating-datahub/#0141 + +- Breaking Changes + - Authentication & RestAPI Authorization enabled by default (since v0.3.6) + - Helm Chart Requirement: 1.4.132+ + - Recommend setting timezone for `datahub-gc` and `datahub-usage-reporting` + - ```yaml + acryl-datahub: + global: + datahub: + timezone: 'America/Los_Angeles' + ``` + - #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`. + - #10472 - `SANDBOX` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. + - #11619 - schema field/column paths can no longer be empty strings + - #11619 - schema field/column paths can no longer be duplicated within the schema + - #11570 - The `DatahubClientConfig`'s server field no longer defaults to `http://localhost:8080`. Be sure to explicitly set this. + - #11570 - If a `datahub_api` is explicitly passed to a stateful ingestion config provider, it will be used. We previously ignored it if the pipeline context also had a graph object. + - #11518 - DataHub Garbage Collection: Various entities that are soft-deleted (after 10d) or are timeseries *entities* (dataprocess, execution requests) will be removed automatically using logic in the `datahub-gc` ingestion source. + +- Bug Fixes + - [UI] Fix a bug in displaying the filter value counts when selecting filters on the primary search experience + - [UI] Fix unnecessary horizontal scrolling wide markdown documentation. + - [UI] Fix bug in siblings external URLs. Now showing both the dbt and Snowflake URL as separate, correct URLs. + - [UI] Fix bug on listing data product assets with View applied + - [UI] Fix siblings bug in Schema Field queries tab + - [UI] Handle edge cycles in lineage graph more correctly + - [UI] Hide incorrect "Lineage" sidebar section on sibling pages (incorrect merge) + - [UI] Miscellaneous fixes to **Automations** forms UI - creating and editing automations. + - [UI] Fix scrolling to the end of the list of tabs on Asset Profiles + - [UI] Fix Compact View preview on Hover Card (looked squished!) + - [UI] Update asset counts on Domain profile pages after adding and removing assets right away + - [UI] Improve support for Compliance Forms and Structured Properties on sibling asset profile pages + - [Automations] **Column Description Propagation**: Fix Column Description Propagation issue where column description would not propagate if self-lineage was stored in graph index + - [Automations] **Snowflake Tag Sync**: Fix bug in Snowflake Tag Sync that failed to sync to columns with special characters + +- Product + - [BETA] Introducing the **BigQuery Metadata Sync Automation** to sync tags, glossary terms, and descriptions from DataHub to BigQuery. Check out the [feature guide](https://datahubproject.io/docs/automations/bigquery-metadata-sync/) for more information. To enable this BETA feature, reach out to your Acryl representative. + - [BETA] Introducing the **AI Classification Automation** to automatically classify your tables & columns using your organization's custom glossary terms. Check out the [feature guide](https://datahubproject.io/docs/automations/ai-term-suggestion) for more information. To enable this BETA feature, reach out to your Acryl representative. + - [BETA] A new way to visualize Column-Level Lineage, focused on a single column. Accessible by clicking on a column name in the column details sidebar or by clicking on the "Explore complete column lineage" button on a column in the regular lineage visualization. This will allow you to view only the upstreams and downstreams of the specific column being viewed. Please reach out to your Acryl representative to enable this feature. + - [BETA] Support running Automations via a Remote Executor using an Executor ID. This is currently in Beta, please reach out to your Acryl representative for more information. + - [BETA] Support plugging in custom Mixpanel or Google Analytics Measurement ID (GA4) to DataHub. Reach out to your Acryl representative for more information. + - Introducing **Structured Properties** UI. Create and manage custom properties for all asset types via the DataHub UI under **Govern** > **Structured Properties**. Feature guide will be coming in v0.3.8 - reach out to your Acryl representative for more information. Requires the `Manage Structured Properties` privilege to edit, `View Structured Properties` privilege to view. + - Introducing **Compliance Forms** UI. Create and manage compliance forms to run large-scale metadata collection initiatives inside your organization. Supported for all asset types via the DataHub UI under **Govern** > **Compliance Forms**. Feature guide will be coming in v0.3.8 - reach out to your Acryl representative for more information. Requires the `Manage Compliance Forms` privilege to edit, `View Compliance Forms` privilege to view. Compliance Forms also support analytics, which are updated once per day by default. + - Support adding and removing structured properties from Table & Column Properties Tab + - Support filtering by Structured Properties in the search UI (main search only, not on lists yet) + - Acryl 2.0 is enabled by default for all users who have no explicitly set their display preference via **Settings** > **Appearance**. + - Support searching the visible lineage graph by asset name + - Support showing 'all' assets in a downstream or upstream lineage level in one click + - Support searching the assets hidden by a collapsed, "show more" node + - On lineage graph, draw an arrow from a column to the "show more" node if that column has lineage to a hidden node + - On lineage graph, add control to show lineage edges to entities that are deleted / do not exist + - Support deleting Data Product from the Data Product page + - Support viewing & editing documentation in full-screen mode + - Support copying queries for View Definitions (sidebar + tabs) + - Support V2 UI with Chrome Extension, fix miscellaneous bugs related to documentations, glossary, and lineage interactions. + - Minor UX improvements (alignments, etc) to Quality, Assertions tabs. + - Reorder the asset sidebar sections to prioritize documentation & lineage, the most used features. Moved down status, and share related tabs. + - Add "Total Views" and "Recent Views" statistics to Dashboard & Chart asset sidebar header. + - Ingestion UI: Always display the number of assets ingested on "Failure" & "Succeeded With Warnings" + - Permissions: Hide **Settings** > **Access Tokens** page if user doesn't have the `Generate Access Tokens` privilege. + - Add a Properties tab to Asset sidebar + - Hide the 'notes' icon from the Columns table on Dataset Profiles, only show in the Column sidebar + - Add Properties Count, Column Count, Incident Count to Asset Profile tab names + - Allow resizing of the browse sizebar + - Display custom Assertion Error messages via the UI + - Add sorting to Columns table + - Add description to "hover preview" of assets + - Rename 'Inbox' navigation item to 'Tasks' to align with rebranding as 'Task Center' + - Support viewing correctly merged schema change history for sibling pages + - Minor UX improvements on lineage graph + - Minox UX improvements on Glossary, Search Cards, Home page, Subscriptions tab, and more. + - Improved usage-based search ranking. Please reach out with any questions or concerns + - Improved UX for setting up and managing SSO + +- Ingestion changes + - In addition to the improvements listed here: https://github.com/datahub-project/datahub/releases/tag/v0.14.1.7 + - PowerBI: Support for PowerBI Apps and cross-workspace lineage + - Fivetran: Major improvements to configurability and improved reliability with large Fivetran setups + - Snowflake & BigQuery: Improved handling of temporary tables and swap statements when generating lineage + - [Beta] Preset integration + +- Platform changes + - Added datahub-usage-reporting job to calculate usage metrics for search ranking + - Metadata Test performance improvements: async ingestion & tag patch support + - Authentication & RestAPI Authorization enabled by default + - Added datahub-gc and datahub-usage-reporting SYSTEM ingestion sources + - Added sweeper to executor to cancel duplicate and stale ingestion jobs + - Added soft delete status to edges in graph store + - Added service side options for newer clients with older service + - ALTERNATE_MCP_VALIDATION=true + - MCP_VALIDATION_IGNORE_UNKNOWN=true + - OpenAPIv3 + - Added generic entities scroll endpoint + - Added `async` and `createIfNotExists` on aspect endpoints + - System Operations privilege extended to all system operations + - [BETA] Introduce Entity Change Events Poll API behind permission "Get Platform Events". This enables programmatic access to entity change events in DataHub. Reach out to your Acryl representative for more information. + - (system / internal) Exclude form-prompt tests in live Metadata Tests evaluation + - (system / internal) Exclude form-prompt tests in stored Metadata Test results + - Elasticsearch reindex time limit of 8h removed diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py index 1cb736bb1ba83..2ad301a38d002 100644 --- a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py +++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py @@ -66,7 +66,7 @@ has_name_positional_arg = packaging.version.parse( GX_VERSION - ) >= packaging.version.Version("0.18.0") + ) >= packaging.version.Version("0.18.14") except Exception: has_name_positional_arg = False @@ -89,7 +89,7 @@ class DataHubValidationAction(ValidationAction): def __init__( self, data_context: AbstractDataContext, - # this would capture `name` positional arg added in GX 0.18.0 + # this would capture `name` positional arg added in GX 0.18.14 *args: Union[str, Any], server_url: str, env: str = builder.DEFAULT_ENV, diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index c95d0e545c598..b5d0ed42e651e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -1822,16 +1822,21 @@ def _aggregate_owners( logger.debug( f"Owner after applying owner extraction pattern:'{self.config.owner_extraction_pattern}' is '{owner}'." ) - if self.config.strip_user_ids_from_email: - owner = owner.split("@")[0] - logger.debug(f"Owner (after stripping email):{owner}") - - owner_list.append( - OwnerClass( - owner=mce_builder.make_user_urn(owner), - type=OwnershipTypeClass.DATAOWNER, + if isinstance(owner, list): + owners = owner + else: + owners = [owner] + for owner in owners: + if self.config.strip_user_ids_from_email: + owner = owner.split("@")[0] + logger.debug(f"Owner (after stripping email):{owner}") + + owner_list.append( + OwnerClass( + owner=mce_builder.make_user_urn(owner), + type=OwnershipTypeClass.DATAOWNER, + ) ) - ) owner_list = sorted(owner_list, key=lambda x: x.owner) return owner_list diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java index c40fa49173627..df902f95245ef 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java @@ -266,6 +266,7 @@ private static List computeDiffs( SchemaField renamedField = findRenamedField( curBaseField, + new HashSet<>(baseFields.subList(baseFieldIdx, baseFields.size())), targetFields.subList(targetFieldIdx, targetFields.size()), renamedFields); if (renamedField == null) { @@ -289,7 +290,10 @@ private static List computeDiffs( // minor version bump for both. SchemaField renamedField = findRenamedField( - curTargetField, baseFields.subList(baseFieldIdx, baseFields.size()), renamedFields); + curTargetField, + new HashSet<>(targetFields.subList(targetFieldIdx, targetFields.size())), + baseFields.subList(baseFieldIdx, baseFields.size()), + renamedFields); if (renamedField == null) { processAdd(changeCategories, changeEvents, datasetUrn, curTargetField, auditStamp); ++targetFieldIdx; @@ -348,10 +352,14 @@ private static void sortFieldsByPath(SchemaMetadata schemaMetadata) { } private static SchemaField findRenamedField( - SchemaField curField, List targetFields, Set renamedFields) { + SchemaField curField, + Set baseFields, + List targetFields, + Set renamedFields) { return targetFields.stream() .filter(schemaField -> isRenamed(curField, schemaField)) .filter(field -> !renamedFields.contains(field)) + .filter(field -> !baseFields.contains(field)) // Filter out fields that will match later .findFirst() .orElse(null); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java index 22dc3162c1e86..afa0730483108 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java @@ -153,6 +153,37 @@ public void testSchemaFieldRename() throws Exception { Set.of(SchemaFieldModificationCategory.RENAME.toString()), actual); } + @Test + public void testSchemaFieldRename2() throws Exception { + SchemaMetadataChangeEventGenerator test = new SchemaMetadataChangeEventGenerator(); + + Urn urn = getTestUrn(); + String entity = "dataset"; + String aspect = "schemaMetadata"; + AuditStamp auditStamp = getTestAuditStamp(); + + Aspect from = + getSchemaMetadata( + List.of( + new SchemaField().setFieldPath("id").setNativeDataType("VARCHAR"), + new SchemaField().setFieldPath("fullname").setNativeDataType("VARCHAR"), + new SchemaField().setFieldPath("LastName").setNativeDataType("VARCHAR"))); + Aspect to = + getSchemaMetadata( + List.of( + new SchemaField().setFieldPath("id").setNativeDataType("VARCHAR"), + new SchemaField().setFieldPath("fullname").setNativeDataType("VARCHAR"), + new SchemaField().setFieldPath("lastName").setNativeDataType("VARCHAR"))); + List actual = test.getChangeEvents(urn, entity, aspect, from, to, auditStamp); + compareDescriptions( + Set.of( + "A forwards & backwards compatible change due to renaming of the field 'LastName to lastName'."), + actual); + assertEquals(1, actual.size()); + compareModificationCategories( + Set.of(SchemaFieldModificationCategory.RENAME.toString()), actual); + } + @Test public void testSchemaFieldDropAdd() throws Exception { // When a rename cannot be detected, treated as drop -> add diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index 80265e8c632de..ab9019e470dbc 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -43,6 +43,10 @@ dependencies { implementation externalDependency.awsMskIamAuth testRuntimeOnly externalDependency.logbackClassic implementation externalDependency.charle + + testImplementation externalDependency.testng + testImplementation externalDependency.springBootTest + testRuntimeOnly externalDependency.logbackClassic } configurations.all{ exclude group: "com.charleskorn.kaml", module:"kaml" diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java index e47a2b4e278e4..69fb9df2b04db 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java @@ -40,7 +40,8 @@ "com.linkedin.gms.factory.plugins", "com.linkedin.gms.factory.change", "com.datahub.event.hook", - "com.linkedin.gms.factory.notifications" + "com.linkedin.gms.factory.notifications", + "com.linkedin.gms.factory.telemetry" }) @PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) @Configuration diff --git a/metadata-service/war/src/test/java/com/linkedin/gms/SpringTest.java b/metadata-service/war/src/test/java/com/linkedin/gms/SpringTest.java new file mode 100644 index 0000000000000..27dabf1e33ddd --- /dev/null +++ b/metadata-service/war/src/test/java/com/linkedin/gms/SpringTest.java @@ -0,0 +1,32 @@ +package com.linkedin.gms; + +import static org.testng.AssertJUnit.assertNotNull; + +import com.linkedin.gms.factory.telemetry.DailyReport; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import io.ebean.Database; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@SpringBootTest( + webEnvironment = SpringBootTest.WebEnvironment.MOCK, + properties = {"telemetry.enabledServer=true"}) +@ContextConfiguration(classes = CommonApplicationConfig.class) +public class SpringTest extends AbstractTestNGSpringContextTests { + + // Mock Beans take precedence, we add these to avoid needing to configure data sources etc. while + // still testing prod config + @MockBean private Database database; + @MockBean private ConfigEntityRegistry configEntityRegistry; + @MockBean private EntityRegistry entityRegistry; + + @Test + public void testTelemetry() { + DailyReport dailyReport = this.applicationContext.getBean(DailyReport.class); + assertNotNull(dailyReport); + } +}