diff --git a/backend/dataall/modules/dataset_sharing/aws/glue_client.py b/backend/dataall/modules/dataset_sharing/aws/glue_client.py index e897a23d7..caf7acde5 100644 --- a/backend/dataall/modules/dataset_sharing/aws/glue_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/glue_client.py @@ -54,7 +54,7 @@ def database_exists(self): self._client.get_database(CatalogId=self._account_id, Name=self._database) return True except ClientError: - log.info(f'Database {self._database} does not exist on account {self._account_id}...') + log.info(f'Database {self._database} does not exist on account {self._account_id}') return False def table_exists(self, table_name): @@ -120,7 +120,7 @@ def delete_database(self): account_id = self._account_id database = self._database - log.info(f'Deleting database {account_id}://{database} ...') + log.info(f'Deleting database {account_id}://{database} ') try: if self.database_exists(): self._client.delete_database(CatalogId=account_id, Name=database) @@ -133,7 +133,6 @@ def delete_database(self): ) raise e - ## Todo - Check if this is used anywhere def remove_create_table_default_permissions(self): """ When upgrading to LF tables and database can still have Create Table Default Permissions turned on. @@ -177,16 +176,20 @@ def remove_create_table_default_permissions(self): def get_source_catalog(self): """ Get the source catalog account details """ try: - log.info(f'Fetching source catalog details for database {self._database}...') + log.info(f'Fetching source catalog details for database {self._database}') response = self._client.get_database(CatalogId=self._account_id, Name=self._database) linked_database = response.get('Database', {}).get('TargetDatabase', {}) - log.info(f'Fetched source catalog details for database {self._database} are: {linked_database}...') + log.info(f'Fetched source catalog details for database {self._database} are: {linked_database}') if linked_database: return Catalog(account_id=linked_database.get('CatalogId'), database_name=linked_database.get('DatabaseName'), region=linked_database.get('Region', self._region)) + + except self._client.exceptions.EntityNotFoundException as enoFnd: + log.exception(f'Could not fetch source catalog details for database {self._database} due to {enoFnd}') + raise enoFnd except Exception as e: - log.exception(f'Could not fetch source catalog details for database {self._database} due to {e}') + log.exception(f'Error fetching source catalog details for database {self._database} due to {e}') raise e return None @@ -197,7 +200,7 @@ def get_database_tags(self): region = self._region try: - log.info(f'Getting tags for database {database}...') + log.info(f'Getting tags for database {database}') resource_arn = f'arn:aws:glue:{region}:{account_id}:database/{database}' response = self._client.get_tags(ResourceArn=resource_arn) tags = response['Tags'] @@ -205,6 +208,9 @@ def get_database_tags(self): log.info(f'Successfully retrieved tags: {tags}') return tags + except self._client.exceptions.EntityNotFoundException as entNotFound: + log.exception(f'Could not get tags for database {database} due to {entNotFound}') + raise entNotFound except Exception as e: - log.exception(f'Could not get tags for database {database} due to {e}') + log.exception(f'Error fetching tags for {database} due to {e}') raise e diff --git a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py index b9e7442ae..a03eb8345 100644 --- a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py @@ -105,7 +105,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: if processor: log.info(f'Granting permissions to tables: {shared_tables}') approved_tables_succeed = processor.process_approved_shares() - log.info(f'sharing tables succeeded = {approved_tables_succeed}') + log.info(f'Sharing tables succeeded = {approved_tables_succeed}') else: approved_tables_succeed = False @@ -188,7 +188,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): existing_shared_items = existing_shared_folders or existing_shared_buckets log.info(f'Still remaining S3 resources shared = {existing_shared_items}') if not existing_shared_folders and revoked_folders: - log.info("Clean up S3 access points...") + log.info("Clean up S3 access points") clean_up_folders = ProcessS3AccessPointShare.clean_up_share( session, dataset=dataset, @@ -240,7 +240,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): ) log.info(f'Still remaining LF resources shared = {existing_shared_items}') if not existing_shared_items and revoked_tables: - log.info("Clean up LF remaining resources...") + log.info("Clean up LF remaining resources") clean_up_tables = processor.delete_shared_database() log.info(f"Clean up LF successful = {clean_up_tables}") @@ -292,7 +292,12 @@ def create_lf_processor(session, target_environment, env_group, ) + + # Verify account ownership in case database is in another central catalog account + processor.verify_catalog_ownership() + return processor + # Todo - Add an exception in case GlueClient initialization fails ?? except Exception as e: log.error(f"Error creating LF processor: {e}") for table in shared_tables: diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index b9e5a198f..c534510e6 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -47,7 +47,6 @@ def __init__( self.source_database_name = catalog.database_name if catalog else dataset.GlueDatabaseName self.principals = self.get_share_principals() self.shared_db_name = self.build_shared_db_name() - self.verify_catalog_ownership() @abc.abstractmethod def process_approved_shares(self) -> [str]: @@ -540,11 +539,11 @@ def target_glue_client(self): def verify_catalog_ownership(self): if self.catalog_details is None: - logger.info(f'database {self.dataset.GlueDatabaseName} is not a resource link, no catalog information present') + logger.info(f'Database {self.dataset.GlueDatabaseName} is not a resource link, no catalog information present') return if self.catalog_details.account_id != self.source_environment.AwsAccountId: - logger.info(f'database {self.dataset.GlueDatabaseName} is a resource link ' + logger.info(f'Database {self.dataset.GlueDatabaseName} is a resource link ' f'the source database {self.catalog_details.database_name} belongs to a catalog account {self.catalog_details.account_id}') if SessionHelper.is_assumable_pivot_role(self.catalog_details.account_id): self.validate_catalog_ownership_tag() @@ -559,4 +558,4 @@ def validate_catalog_ownership_tag(self): if tags.get('owner_account_id', '') == self.source_environment.AwsAccountId: logger.info(f'owner_account_id tag exists and matches the source account id {self.source_environment.AwsAccountId}') else: - raise Exception(f'owner_account_id tag does not exist or does not matches the source account id {self.source_environment.AwsAccountId}') + raise Exception(f'owner_account_id tag does not exist or does not match the source account id {self.source_environment.AwsAccountId}')