From 179fbbbb527e9cbf8e4cf40d852ab910990d3bf7 Mon Sep 17 00:00:00 2001 From: dlpzx <71252798+dlpzx@users.noreply.github.com> Date: Tue, 27 Feb 2024 18:24:03 +0100 Subject: [PATCH] Update Worksheet database names in UI for new simplified db names (#1063) ### Feature or Bugfix - Bugfix ### Detail After implementing #1016 the names displayed for the databases in Worksheets won't contain the unique identifier. In addition this PR solves https://github.com/data-dot-all/dataall/issues/805 by removing duplicates also in FE. Here is an screenshot of a local test: ![Screenshot 2024-02-14 at 17 34 16](https://github.com/data-dot-all/dataall/assets/71252798/a3756f7e-4284-4420-8d48-a33a6340120d) Update: Because there will be a mix of old shares with Glue database names ending in`shared_URI` and shares with database names suffixed by `shared` only, this PR introduces a new field in the GraphQL type returned by the searchDataItems query. This field resolves the name of the shared Glue database. @noah-paige @TejasRGitHub in this [commit](https://github.com/data-dot-all/dataall/pull/1063/commits/3321109705976b66fe33fd9645c9e2bf5140b546) At first I tried implementing a separate resolver for the Worksheets, but I think we can fix step by step and first focus on the database name and then on the group of the Worksheet vs the group chosen inside the Worksheet. In any case I left the [commit ](https://github.com/data-dot-all/dataall/pull/1063/commits/82e4ed3f83f6138f5b605b9e20faa1b5e0034e72)to have some reference. ### Relates - #1016 - #805 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../modules/dataset_sharing/api/resolvers.py | 21 ++++++++++++++++++- .../modules/dataset_sharing/api/types.py | 7 ++++++- .../services/share_object_service.py | 8 +++++++ .../modules/Worksheets/views/WorksheetView.js | 17 ++++++++++----- .../Environment/searchEnvironmentDataItems.js | 1 + 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index e8efb52c4..667d85f43 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -1,6 +1,5 @@ import logging -from dataall.base import utils from dataall.base.api.context import Context from dataall.core.environment.db.environment_models import Environment from dataall.core.environment.services.environment_service import EnvironmentService @@ -10,6 +9,7 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService +from dataall.modules.dataset_sharing.aws.glue_client import GlueClient from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset @@ -218,6 +218,25 @@ def list_shareable_objects( ) +def resolve_shared_database_name( + context: Context, source +): + if not source: + return None + old_shared_db_name = (source.GlueDatabaseName + '_shared_' + source.shareUri)[:254] + with context.engine.scoped_session() as session: + share = ShareObjectService.get_share_object_in_environment(uri=source.environmentUri, shareUri=source.shareUri) + env = EnvironmentService.get_environment_by_uri(session, share.environmentUri) + database = GlueClient( + account_id=env.AwsAccountId, + database=old_shared_db_name, + region=env.region + ).get_glue_database() + if database: + return old_shared_db_name + return source.GlueDatabaseName + '_shared' + + def list_shares_in_my_inbox(context: Context, source, filter: dict = None): if not filter: filter = {} diff --git a/backend/dataall/modules/dataset_sharing/api/types.py b/backend/dataall/modules/dataset_sharing/api/types.py index ed954962a..9956eb7e7 100644 --- a/backend/dataall/modules/dataset_sharing/api/types.py +++ b/backend/dataall/modules/dataset_sharing/api/types.py @@ -2,7 +2,7 @@ from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareableType, PrincipalType from dataall.modules.dataset_sharing.api.resolvers import union_resolver, resolve_shared_item, resolve_dataset, \ resolve_consumption_data, resolve_existing_shared_items, resolve_share_object_statistics, resolve_principal, \ - resolve_group, list_shareable_objects, resolve_user_role + resolve_group, list_shareable_objects, resolve_user_role, resolve_shared_database_name from dataall.core.environment.api.resolvers import resolve_environment ShareableObject = gql.Union( @@ -192,6 +192,11 @@ gql.Field(name='GlueDatabaseName', type=gql.String), gql.Field(name='GlueTableName', type=gql.String), gql.Field(name='S3AccessPointName', type=gql.String), + gql.Field( + 'sharedGlueDatabaseName', + type=gql.String, + resolver=resolve_shared_database_name, + ), ], ) diff --git a/backend/dataall/modules/dataset_sharing/services/share_object_service.py b/backend/dataall/modules/dataset_sharing/services/share_object_service.py index cacef7a1a..da4299da7 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_object_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_object_service.py @@ -6,6 +6,7 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.permissions.db.resource_policy_repositories import ResourcePolicy from dataall.core.permissions.permission_checker import has_resource_permission +from dataall.core.permissions.permissions import GET_ENVIRONMENT from dataall.core.tasks.db.task_models import Task from dataall.base.db import utils from dataall.base.utils.naming_convention import NamingConventionPattern @@ -27,6 +28,13 @@ class ShareObjectService: + + @staticmethod + @has_resource_permission(GET_ENVIRONMENT) + def get_share_object_in_environment(uri, shareUri): + with get_context().db_engine.scoped_session() as session: + return ShareObjectRepository.get_share_by_uri(session, shareUri) + @staticmethod @has_resource_permission(GET_SHARE_OBJECT) def get_share_object(uri): diff --git a/frontend/src/modules/Worksheets/views/WorksheetView.js b/frontend/src/modules/Worksheets/views/WorksheetView.js index ee64eb852..3ac51c0e9 100644 --- a/frontend/src/modules/Worksheets/views/WorksheetView.js +++ b/frontend/src/modules/Worksheets/views/WorksheetView.js @@ -181,11 +181,18 @@ const WorksheetView = () => { response.data.searchEnvironmentDataItems.nodes.map((d) => ({ datasetUri: d.datasetUri, value: d.datasetUri, - label: `${d.GlueDatabaseName}_shared_${d.shareUri}`, - GlueDatabaseName: - `${d.GlueDatabaseName}_shared_${d.shareUri}`.substring(0, 254), + label: d.sharedGlueDatabaseName, + GlueDatabaseName: d.sharedGlueDatabaseName, environmentUri: d.environmentUri })); + // Remove duplicates based on GlueDatabaseName + sharedWithDatabases = sharedWithDatabases.filter( + (database, index, self) => + index === + self.findIndex( + (d) => d.GlueDatabaseName === database.GlueDatabaseName + ) + ); } setDatabaseOptions(ownedDatabases.concat(sharedWithDatabases)); setLoadingDatabases(false); @@ -196,7 +203,7 @@ const WorksheetView = () => { async (environment, dataset) => { setLoadingTables(true); let response = ''; - if (dataset.GlueDatabaseName.includes(dataset.datasetUri + '_shared_')) { + if (dataset.GlueDatabaseName.includes(dataset.datasetUri + '_shared')) { response = await client.query( getSharedDatasetTables({ datasetUri: dataset.datasetUri, @@ -214,7 +221,7 @@ const WorksheetView = () => { if ( !response.errors && - dataset.GlueDatabaseName.includes(dataset.datasetUri + '_shared_') + dataset.GlueDatabaseName.includes(dataset.datasetUri + '_shared') ) { setTableOptions( response.data.getSharedDatasetTables.map((t) => ({ diff --git a/frontend/src/services/graphql/Environment/searchEnvironmentDataItems.js b/frontend/src/services/graphql/Environment/searchEnvironmentDataItems.js index ac53759ba..ce7ea47a0 100644 --- a/frontend/src/services/graphql/Environment/searchEnvironmentDataItems.js +++ b/frontend/src/services/graphql/Environment/searchEnvironmentDataItems.js @@ -34,6 +34,7 @@ export const searchEnvironmentDataItems = ({ filter, environmentUri }) => ({ S3AccessPointName created principalId + sharedGlueDatabaseName } } }