From b6449d1cfa21dc54d89e7ab1bdee66a439127fc6 Mon Sep 17 00:00:00 2001 From: Tejas Rajopadhye <71188245+TejasRGitHub@users.noreply.github.com> Date: Mon, 19 Feb 2024 08:08:41 -0600 Subject: [PATCH 1/2] [Gh-1032] Feature flags for topics and confidentiality and custom confidentiality list (#1049) ### Feature or Bugfix - Feature ### Detail This features adds feature flags to enable / disable topics and confidentiality fields while creating a dataset. Moreover, it adds ways to provide custom confidentiality fields to show on the UI. Please note - For the custom confidentiality fields you will have to provide the mapping to the existing confidentiality levels ( i.e. Unclassified, Secret, Official ) in data.all . Please refer to this section about setting data.all for more details - https://data-dot-all.github.io/dataall/deploy-aws/ ### Testing 1. Tested on local dev setup ( All Config Switches , Custom confidentiality values and standard confidentiality values ) 2. Tested on AWS account ( All Config Switches and custom confidentiality / standard confidentiality values ) 3. Unit test ### Things to do - [ ] Readme document to let user know how to use this feature of custom_confidentiality_mapping **( TODO When PR is APPROVED )** ### How to use this feature. The config.json is already updated with the new configs , if you want to use the custom mapping , please add the following section in the config.json under the `modules \ datasets \ features ` ```json "custom_confidentiality_mapping" : { "Public" : "Unclassified", "Custom Confidentiality" : "Official", "Custom Confidential" : "Secret", "Another Confidentiality" : "Official" } ``` ### Relates - https://github.com/data-dot-all/dataall/issues/1032 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? No - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? No - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? No - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? No - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --------- Co-authored-by: trajopadhye --- .../datasets/api/dataset/input_types.py | 6 +- .../modules/datasets/api/dataset/resolvers.py | 3 +- .../modules/datasets/api/dataset/types.py | 2 +- .../modules/datasets/cdk/dataset_stack.py | 5 +- .../datasets/indexers/dataset_indexer.py | 4 +- .../services/dataset_column_service.py | 2 +- .../services/dataset_profiling_service.py | 2 +- .../services/dataset_table_service.py | 2 +- .../services/datasets_base_enums.py | 18 +++ config.json | 4 +- frontend/src/modules/Catalog/views/Catalog.js | 44 ++--- .../Datasets/components/DatasetGovernance.js | 56 ++++--- .../Datasets/views/DatasetCreateForm.js | 145 ++++++++++------- .../modules/Datasets/views/DatasetEditForm.js | 153 ++++++++++-------- .../Datasets/views/DatasetImportForm.js | 143 +++++++++------- frontend/src/modules/constants.js | 2 + tests/modules/datasets/conftest.py | 12 ++ 17 files changed, 362 insertions(+), 241 deletions(-) diff --git a/backend/dataall/modules/datasets/api/dataset/input_types.py b/backend/dataall/modules/datasets/api/dataset/input_types.py index 0d8ca5198..9b8d3db81 100644 --- a/backend/dataall/modules/datasets/api/dataset/input_types.py +++ b/backend/dataall/modules/datasets/api/dataset/input_types.py @@ -19,7 +19,7 @@ gql.Argument( name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), - gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), + gql.Argument('confidentiality', gql.String), gql.Argument(name='stewards', type=gql.String), gql.Argument(name='autoApprovalEnabled', type=gql.Boolean) ], @@ -36,7 +36,7 @@ gql.Argument('businessOwnerDelegationEmails', gql.ArrayType(gql.String)), gql.Argument('businessOwnerEmail', gql.String), gql.Argument('language', gql.Ref('Language')), - gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), + gql.Argument('confidentiality', gql.String), gql.Argument(name='stewards', type=gql.String), gql.Argument('KmsAlias', gql.NonNullableType(gql.String)), gql.Argument(name='autoApprovalEnabled', type=gql.Boolean) @@ -103,7 +103,7 @@ gql.Argument( name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), - gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), + gql.Argument('confidentiality', gql.String), gql.Argument(name='stewards', type=gql.String), gql.Argument(name='autoApprovalEnabled', type=gql.Boolean) diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index 8f2abc864..d37d0fc58 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -9,7 +9,7 @@ from dataall.base.db.exceptions import RequiredParameter, InvalidInput from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.datasets_base.db.dataset_models import Dataset -from dataall.modules.datasets_base.services.datasets_base_enums import DatasetRole +from dataall.modules.datasets_base.services.datasets_base_enums import DatasetRole, ConfidentialityClassification from dataall.modules.datasets.services.dataset_service import DatasetService log = logging.getLogger(__name__) @@ -201,6 +201,7 @@ def validate_creation_request(data): raise RequiredParameter('group') if not data.get('label'): raise RequiredParameter('label') + ConfidentialityClassification.validate_confidentiality_level(data.get('confidentiality', '')) if len(data['label']) > 52: raise InvalidInput( 'Dataset name', data['label'], 'less than 52 characters' diff --git a/backend/dataall/modules/datasets/api/dataset/types.py b/backend/dataall/modules/datasets/api/dataset/types.py index 32b8dc482..de73afdf4 100644 --- a/backend/dataall/modules/datasets/api/dataset/types.py +++ b/backend/dataall/modules/datasets/api/dataset/types.py @@ -129,7 +129,7 @@ ), gql.Field(name='topics', type=gql.ArrayType(gql.Ref('Topic'))), gql.Field( - name='confidentiality', type=gql.Ref('ConfidentialityClassification') + name='confidentiality', type=gql.String ), gql.Field(name='language', type=gql.Ref('Language')), gql.Field( diff --git a/backend/dataall/modules/datasets/cdk/dataset_stack.py b/backend/dataall/modules/datasets/cdk/dataset_stack.py index 9aa08ff8e..16d38bdde 100644 --- a/backend/dataall/modules/datasets/cdk/dataset_stack.py +++ b/backend/dataall/modules/datasets/cdk/dataset_stack.py @@ -25,6 +25,8 @@ from dataall.modules.datasets.aws.lf_dataset_client import LakeFormationDatasetClient from dataall.modules.datasets_base.db.dataset_models import Dataset from dataall.base.utils.cdk_nag_utils import CDKNagUtil +from dataall.base.config import config + logger = logging.getLogger(__name__) @@ -535,7 +537,8 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): ) trigger.node.add_dependency(job) - Tags.of(self).add('Classification', dataset.confidentiality) + if config.get_property('modules.datasets.features.confidentiality_dropdown', False): + Tags.of(self).add('Classification', dataset.confidentiality) TagsUtil.add_tags(stack=self, model=Dataset, target_type="dataset") diff --git a/backend/dataall/modules/datasets/indexers/dataset_indexer.py b/backend/dataall/modules/datasets/indexers/dataset_indexer.py index bf5f15abf..672dcdfc4 100644 --- a/backend/dataall/modules/datasets/indexers/dataset_indexer.py +++ b/backend/dataall/modules/datasets/indexers/dataset_indexer.py @@ -1,4 +1,6 @@ """Indexes Datasets in OpenSearch""" +import re + from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.modules.vote.db.vote_repositories import VoteRepository @@ -34,7 +36,7 @@ def upsert(cls, session, dataset_uri: str): 'source': dataset.S3BucketName, 'resourceKind': 'dataset', 'description': dataset.description, - 'classification': dataset.confidentiality, + 'classification': re.sub('[^A-Za-z0-9]+', '', dataset.confidentiality), 'tags': [t.replace('-', '') for t in dataset.tags or []], 'topics': dataset.topics, 'region': dataset.region.replace('-', ''), diff --git a/backend/dataall/modules/datasets/services/dataset_column_service.py b/backend/dataall/modules/datasets/services/dataset_column_service.py index 34d097035..a600ea649 100644 --- a/backend/dataall/modules/datasets/services/dataset_column_service.py +++ b/backend/dataall/modules/datasets/services/dataset_column_service.py @@ -33,7 +33,7 @@ def paginate_active_columns_for_table(uri: str, filter=None): table: DatasetTable = DatasetTableRepository.get_dataset_table_by_uri(session, uri) dataset = DatasetRepository.get_dataset_by_uri(session, table.datasetUri) if ( - dataset.confidentiality != ConfidentialityClassification.Unclassified.value + ConfidentialityClassification.get_confidentiality_level(dataset.confidentiality) != ConfidentialityClassification.Unclassified.value ): ResourcePolicy.check_user_resource_permission( session=session, diff --git a/backend/dataall/modules/datasets/services/dataset_profiling_service.py b/backend/dataall/modules/datasets/services/dataset_profiling_service.py index 5c6eccff1..d0ad83bdd 100644 --- a/backend/dataall/modules/datasets/services/dataset_profiling_service.py +++ b/backend/dataall/modules/datasets/services/dataset_profiling_service.py @@ -110,7 +110,7 @@ def _check_preview_permissions_if_needed(session, table_uri): session, table_uri ) dataset = DatasetRepository.get_dataset_by_uri(session, table.datasetUri) - if dataset.confidentiality != ConfidentialityClassification.Unclassified.value: + if ConfidentialityClassification.get_confidentiality_level(dataset.confidentiality) != ConfidentialityClassification.Unclassified.value: ResourcePolicy.check_user_resource_permission( session=session, username=context.username, diff --git a/backend/dataall/modules/datasets/services/dataset_table_service.py b/backend/dataall/modules/datasets/services/dataset_table_service.py index b226d81c3..42338c6b2 100644 --- a/backend/dataall/modules/datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/datasets/services/dataset_table_service.py @@ -85,7 +85,7 @@ def preview(table_uri: str): ) dataset = DatasetRepository.get_dataset_by_uri(session, table.datasetUri) if ( - dataset.confidentiality != ConfidentialityClassification.Unclassified.value + ConfidentialityClassification.get_confidentiality_level(dataset.confidentiality) != ConfidentialityClassification.Unclassified.value ): ResourcePolicy.check_user_resource_permission( session=session, diff --git a/backend/dataall/modules/datasets_base/services/datasets_base_enums.py b/backend/dataall/modules/datasets_base/services/datasets_base_enums.py index 523ec3906..b5b80c17a 100644 --- a/backend/dataall/modules/datasets_base/services/datasets_base_enums.py +++ b/backend/dataall/modules/datasets_base/services/datasets_base_enums.py @@ -1,4 +1,8 @@ from dataall.base.api.constants import GraphQLEnumMapper +from dataall.base.config import config +from dataall.base.db.exceptions import InvalidInput + +custom_confidentiality_mapping = config.get_property('modules.datasets.features.custom_confidentiality_mapping', {}) class DatasetRole(GraphQLEnumMapper): @@ -22,6 +26,20 @@ class ConfidentialityClassification(GraphQLEnumMapper): Official = 'Official' Secret = 'Secret' + @staticmethod + def get_confidentiality_level(confidentiality): + return confidentiality if not custom_confidentiality_mapping else custom_confidentiality_mapping.get( + confidentiality, None) + + @staticmethod + def validate_confidentiality_level(confidentiality): + if config.get_property('modules.datasets.features.confidentiality_dropdown', False): + confidentiality = ConfidentialityClassification.get_confidentiality_level(confidentiality) + if confidentiality not in [item.value for item in list(ConfidentialityClassification)]: + raise InvalidInput('Confidentiality Name', confidentiality, + 'does not conform to the confidentiality classification. Hint: Check your confidentiality value OR check your mapping if you are using custom confidentiality values') + return True + class Language(GraphQLEnumMapper): English = 'English' diff --git a/config.json b/config.json index 75baa56b4..a79a17d0f 100644 --- a/config.json +++ b/config.json @@ -24,7 +24,9 @@ } }, "preview_data": true, - "glue_crawler": true + "glue_crawler": true, + "confidentiality_dropdown" : true, + "topics_dropdown" : true } }, "worksheets": { diff --git a/frontend/src/modules/Catalog/views/Catalog.js b/frontend/src/modules/Catalog/views/Catalog.js index c52c3b42f..b9664c004 100644 --- a/frontend/src/modules/Catalog/views/Catalog.js +++ b/frontend/src/modules/Catalog/views/Catalog.js @@ -32,6 +32,7 @@ import { useSettings } from 'design'; import { GlossarySearchWrapper, GlossarySearchResultItem } from '../components'; +import config from '../../../generated/config.json'; const useStyles = makeStyles((theme) => ({ mainSearch: { @@ -171,7 +172,14 @@ const Catalog = () => { const classes = useStyles(); const anchorRef = useRef(null); const [openMenu, setOpenMenu] = useState(false); - const [filterItems] = useState([ + const dataFieldList = ['label', 'name', 'description', 'region', 'tags']; + + if (config.modules.datasets.features.topics_dropdown === true) + dataFieldList.push('topics'); + if (config.modules.datasets.features.confidentiality_dropdown === true) + dataFieldList.push('classification'); + + const filterItemsInit = [ { title: 'Type', dataField: 'resourceKind', @@ -184,25 +192,30 @@ const Catalog = () => { componentId: 'TagSensor', filterLabel: 'Tags' }, - { - title: 'Topics', - dataField: 'topics', - componentId: 'TopicSensor', - filterLabel: 'Topics' - }, { title: 'Region', dataField: 'region', componentId: 'RegionSensor', filterLabel: 'Region' - }, - { + } + ]; + + if (config.modules.datasets.features.topics_dropdown === true) + filterItemsInit.push({ + title: 'Topics', + dataField: 'topics', + componentId: 'TopicSensor', + filterLabel: 'Topics' + }); + if (config.modules.datasets.features.confidentiality_dropdown === true) + filterItemsInit.push({ title: 'Classification', dataField: 'classification', componentId: 'ClassificationSensor', filterLabel: 'Classification' - } - ]); + }); + + const [filterItems] = useState(filterItemsInit); const [listClass, setListClass] = useState( settings.theme === THEMES.LIGHT ? classes.lightListSearch @@ -337,14 +350,7 @@ const Catalog = () => { fuzziness="AUTO" componentId="SearchSensor" filterLabel="text" - dataField={[ - 'label', - 'name', - 'description', - 'region', - 'topics', - 'tags' - ]} + dataField={dataFieldList} placeholder="Search" /> diff --git a/frontend/src/modules/Datasets/components/DatasetGovernance.js b/frontend/src/modules/Datasets/components/DatasetGovernance.js index f41b27ea2..f4b1ea6cb 100644 --- a/frontend/src/modules/Datasets/components/DatasetGovernance.js +++ b/frontend/src/modules/Datasets/components/DatasetGovernance.js @@ -9,6 +9,7 @@ import { } from '@mui/material'; import PropTypes from 'prop-types'; import { Label } from 'design'; +import { isFeatureEnabled } from 'utils'; export const DatasetGovernance = (props) => { const { dataset } = props; @@ -48,31 +49,36 @@ export const DatasetGovernance = (props) => { - - - Classification - - - - - - - - Topics - - - {dataset.topics && - dataset.topics.length > 0 && - dataset.topics.map((t) => ( - - ))} - - + {isFeatureEnabled('datasets', 'confidentiality_dropdown') && ( + + + Classification + + + + + + )} + {isFeatureEnabled('datasets', 'topics_dropdown') && ( + + + Topics + + + {dataset.topics && + dataset.topics.length > 0 && + dataset.topics.map((t) => ( + + ))} + + + )} + Tags diff --git a/frontend/src/modules/Datasets/views/DatasetCreateForm.js b/frontend/src/modules/Datasets/views/DatasetCreateForm.js index 3d4ab89f8..ebb2a41bb 100644 --- a/frontend/src/modules/Datasets/views/DatasetCreateForm.js +++ b/frontend/src/modules/Datasets/views/DatasetCreateForm.js @@ -37,7 +37,9 @@ import { listEnvironmentGroups, listValidEnvironments } from 'services'; -import { Topics } from '../../constants'; +import { Topics, ConfidentialityList } from '../../constants'; +import config from '../../../generated/config.json'; +import { isFeatureEnabled } from 'utils'; const DatasetCreateForm = (props) => { const dispatch = useDispatch(); @@ -48,11 +50,14 @@ const DatasetCreateForm = (props) => { const [loading, setLoading] = useState(true); const [groupOptions, setGroupOptions] = useState([]); const [environmentOptions, setEnvironmentOptions] = useState([]); - const [confidentialityOptions] = useState([ - 'Unclassified', - 'Official', - 'Secret' - ]); + const [confidentialityOptions] = useState( + config.modules.datasets.features.confidentiality_dropdown === true && + config.modules.datasets.features.custom_confidentiality_mapping + ? Object.keys( + config.modules.datasets.features.custom_confidentiality_mapping + ) + : ConfidentialityList + ); const topicsData = Topics.map((t) => ({ label: t, value: t })); @@ -226,13 +231,20 @@ const DatasetCreateForm = (props) => { SamlGroupName: Yup.string() .max(255) .required('*Owners team is required'), - topics: Yup.array().min(1).required('*Topics are required'), + topics: isFeatureEnabled('datasets', 'topics_dropdown') + ? Yup.array().min(1).required('*Topics are required') + : Yup.array(), environment: Yup.object().required('*Environment is required'), tags: Yup.array().min(1).required('*Tags are required'), stewards: Yup.string().max(255).nullable(), - confidentiality: Yup.string() - .max(255) - .required('*Confidentiality is required'), + confidentiality: isFeatureEnabled( + 'datasets', + 'confidentiality_dropdown' + ) + ? Yup.string() + .max(255) + .required('*Confidentiality is required') + : Yup.string(), autoApprovalEnabled: Yup.boolean().required( '*AutoApproval property is required' ) @@ -304,59 +316,70 @@ const DatasetCreateForm = (props) => { )} - + - - - {confidentialityOptions.map((c) => ( - - {c} - - ))} - - - - opt.label} - onChange={(event, value) => { - setFieldValue('topics', value); - }} - renderTags={(tagValue, getTagProps) => - tagValue.map((option, index) => ( - + + {confidentialityOptions.map((c) => ( + + {c} + + ))} + + + )} + {isFeatureEnabled('datasets', 'topics_dropdown') && ( + + opt.label} + onChange={(event, value) => { + setFieldValue('topics', value); + }} + renderTags={(tagValue, getTagProps) => + tagValue.map((option, index) => ( + + )) + } + renderInput={(p) => ( + - )) - } - renderInput={(p) => ( - - )} - /> - + )} + /> + + )} { const dispatch = useDispatch(); @@ -52,11 +54,14 @@ const DatasetEditForm = (props) => { const [groupOptions, setGroupOptions] = useState([]); const [selectableTerms, setSelectableTerms] = useState([]); const [tableTerms, setTableTerms] = useState([]); - const [confidentialityOptions] = useState([ - 'Unclassified', - 'Official', - 'Secret' - ]); + const [confidentialityOptions] = useState( + config.modules.datasets.features.confidentiality_dropdown === true && + config.modules.datasets.features.custom_confidentiality_mapping + ? Object.keys( + config.modules.datasets.features.custom_confidentiality_mapping + ) + : ConfidentialityList + ); const topicsData = Topics.map((t) => ({ label: t, value: t })); @@ -271,11 +276,18 @@ const DatasetEditForm = (props) => { .required('*Dataset name is required'), description: Yup.string().max(5000), KmsAlias: Yup.string().max(255), - topics: Yup.array().min(1).required('*Topics are required'), + topics: isFeatureEnabled('datasets', 'topics_dropdown') + ? Yup.array().min(1).required('*Topics are required') + : Yup.array(), tags: Yup.array().min(1).required('*Tags are required'), - confidentiality: Yup.string().required( - '*Confidentiality is required' - ), + confidentiality: isFeatureEnabled( + 'datasets', + 'confidentiality_dropdown' + ) + ? Yup.string() + .max(255) + .required('*Confidentiality is required') + : Yup.string(), autoApprovalEnabled: Yup.boolean().required( '*AutoApproval property is required' ) @@ -350,62 +362,73 @@ const DatasetEditForm = (props) => { - - - {confidentialityOptions.map((c) => ( - - {c} - - ))} - - - - - option.value === value.value - } - getOptionLabel={(opt) => opt.label} - onChange={(event, value) => { - setFieldValue('topics', value); - }} - renderTags={(tagValue, getTagProps) => - tagValue.map((option, index) => ( - + + {confidentialityOptions.map((c) => ( + + {c} + + ))} + + + )} + {isFeatureEnabled('datasets', 'topics_dropdown') && ( + + + option.value === value.value + } + getOptionLabel={(opt) => opt.label} + onChange={(event, value) => { + setFieldValue('topics', value); + }} + renderTags={(tagValue, getTagProps) => + tagValue.map((option, index) => ( + + )) + } + renderInput={(p) => ( + - )) - } - renderInput={(p) => ( - - )} - /> - + )} + /> + + )} {dataset && ( { const dispatch = useDispatch(); @@ -48,11 +50,14 @@ const DatasetImportForm = (props) => { const [loading, setLoading] = useState(true); const [groupOptions, setGroupOptions] = useState([]); const [environmentOptions, setEnvironmentOptions] = useState([]); - const [confidentialityOptions] = useState([ - 'Unclassified', - 'Official', - 'Secret' - ]); + const [confidentialityOptions] = useState( + config.modules.datasets.features.confidentiality_dropdown === true && + config.modules.datasets.features.custom_confidentiality_mapping + ? Object.keys( + config.modules.datasets.features.custom_confidentiality_mapping + ) + : ConfidentialityList + ); const topicsData = Topics.map((t) => ({ label: t, value: t })); @@ -234,7 +239,9 @@ const DatasetImportForm = (props) => { SamlGroupName: Yup.string() .max(255) .required('*Team is required'), - topics: Yup.array().min(1).required('*Topics are required'), + topics: isFeatureEnabled('datasets', 'topics_dropdown') + ? Yup.array().min(1).required('*Topics are required') + : Yup.array(), environment: Yup.object().required('*Environment is required'), tags: Yup.array().min(1).required('*Tags are required'), glueDatabaseName: Yup.string().max(255), @@ -242,9 +249,14 @@ const DatasetImportForm = (props) => { bucketName: Yup.string() .max(255) .required('*S3 bucket name is required'), - confidentiality: Yup.string() - .max(255) - .required('*Confidentiality is required'), + confidentiality: isFeatureEnabled( + 'datasets', + 'confidentiality_dropdown' + ) + ? Yup.string() + .max(255) + .required('*Confidentiality is required') + : Yup.string(), autoApprovalEnabled: Yup.boolean().required( '*AutoApproval property is required' ) @@ -319,57 +331,68 @@ const DatasetImportForm = (props) => { - - - {confidentialityOptions.map((c) => ( - - {c} - - ))} - - - - opt.label} - onChange={(event, value) => { - setFieldValue('topics', value); - }} - renderTags={(tagValue, getTagProps) => - tagValue.map((option, index) => ( - + + {confidentialityOptions.map((c) => ( + + {c} + + ))} + + + )} + {isFeatureEnabled('datasets', 'topics_dropdown') && ( + + opt.label} + onChange={(event, value) => { + setFieldValue('topics', value); + }} + renderTags={(tagValue, getTagProps) => + tagValue.map((option, index) => ( + + )) + } + renderInput={(p) => ( + - )) - } - renderInput={(p) => ( - - )} - /> - + )} + /> + + )} Date: Mon, 19 Feb 2024 15:43:44 +0100 Subject: [PATCH 2/2] Fix: add sharing guardrails drop permissions (#1055) ### Feature or Bugfix - Feature - Bugfix ### Detail This PR should be tested and reviewed after #1016 is merged. data.all pivot role is a Data Lake admin in Lake Formation. However, to drop tables and databases "DROP" permissions are required, even for Data Lake Admins. In order for the revoke processes to work correctly, we need to ensure that these permissions are granted to the pivot role for data sharing glue resources (shared database and resource link tables) in all situations. One of this scenarios is the migration from manual to auto-created pivot roles and vice-versa as reported in #1053 . Other cases involve manual actions on existing resource links. To avoid any potential issue and make the revoke more robust, this PR explicitly grants DROP permissions on tables before deleting them. And grants "ALL" permissions on the shared_db database. Tested locally: - Create 2 environments with manually created pivot role and create, submit and approve a share request with tables. - [X] Change the configuration to use auto-created pivot roles and revoke the share. The tables are revoked successfully ### Relates - #1053 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../share_managers/lf_share_manager.py | 15 +++++++++++ .../lakeformation_process_share.py | 2 ++ .../datasets/tasks/test_lf_share_manager.py | 25 +++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index 94e629238..b3f2eab95 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -202,6 +202,21 @@ def grant_pivot_role_all_database_permissions_to_shared_database(self) -> True: ) return True + def grant_pivot_role_drop_permissions_to_resource_link_table(self, table: DatasetTable) -> True: + """ + Grants 'DROP' Lake Formation permissions to pivot role to the resource link table in target account + :param table: DatasetTable + :return: True if it is successful + """ + self.lf_client_in_target.grant_permissions_to_table( + principals=[SessionHelper.get_delegation_role_arn(self.target_environment.AwsAccountId)], + database_name=self.shared_db_name, + table_name=table.GlueTableName, + catalog_id=self.target_environment.AwsAccountId, + permissions=['DROP'] + ) + return True + def grant_principals_database_permissions_to_shared_database(self) -> True: """ Grants 'DESCRIBE' Lake Formation permissions to share principals to the shared database in target account diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py index 1406e6eaf..f60a57e02 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py @@ -155,6 +155,7 @@ def process_revoked_shares(self) -> bool: '##### Starting Revoking tables #######' ) success = True + self.grant_pivot_role_all_database_permissions_to_shared_database() for table in self.revoked_tables: share_item = ShareObjectRepository.find_sharable_item( self.session, self.share.shareUri, table.tableUri @@ -182,6 +183,7 @@ def process_revoked_shares(self) -> bool: if (self.is_new_share and not other_table_shares_in_env) or not self.is_new_share: warn('self.is_new_share will be deprecated in v2.6.0', DeprecationWarning, stacklevel=2) + self.grant_pivot_role_drop_permissions_to_resource_link_table(table) self.delete_resource_link_table_in_shared_database(table) if not other_table_shares_in_env: diff --git a/tests/modules/datasets/tasks/test_lf_share_manager.py b/tests/modules/datasets/tasks/test_lf_share_manager.py index 54a39ca85..cb7c48de6 100644 --- a/tests/modules/datasets/tasks/test_lf_share_manager.py +++ b/tests/modules/datasets/tasks/test_lf_share_manager.py @@ -445,6 +445,31 @@ def test_grant_principals_permissions_to_resource_link_table( permissions=['DESCRIBE'] ) + +def test_grant_pivot_role_drop_permissions_to_resource_link_table( + processor_with_mocks, + table1: DatasetTable, + target_environment: Environment, + mocker +): + processor, lf_client, glue_client = processor_with_mocks + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + # When + processor.grant_pivot_role_drop_permissions_to_resource_link_table(table1) + # Then + lf_client.grant_permissions_to_table.assert_called_once() + lf_client.grant_permissions_to_table.assert_called_with( + principals=["arn:role"], + database_name=processor.shared_db_name, + table_name=table1.GlueTableName, + catalog_id=target_environment.AwsAccountId, + permissions=['DROP'] + ) + + def test_grant_principals_permissions_to_table_in_target( processor_with_mocks, table1: DatasetTable,