Skip to content

Commit

Permalink
GH-904 - Addressing comments raised on PR - #905
Browse files Browse the repository at this point in the history
  • Loading branch information
TejasRGitHub authored and trajopadhye committed Jan 29, 2024
1 parent b81a91d commit 2fd031b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 15 deletions.
22 changes: 14 additions & 8 deletions backend/dataall/modules/dataset_sharing/aws/glue_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def database_exists(self):
self._client.get_database(CatalogId=self._account_id, Name=self._database)
return True
except ClientError:
log.info(f'Database {self._database} does not exist on account {self._account_id}...')
log.info(f'Database {self._database} does not exist on account {self._account_id}')
return False

def table_exists(self, table_name):
Expand Down Expand Up @@ -120,7 +120,7 @@ def delete_database(self):
account_id = self._account_id
database = self._database

log.info(f'Deleting database {account_id}://{database} ...')
log.info(f'Deleting database {account_id}://{database} ')
try:
if self.database_exists():
self._client.delete_database(CatalogId=account_id, Name=database)
Expand All @@ -133,7 +133,6 @@ def delete_database(self):
)
raise e

## Todo - Check if this is used anywhere
def remove_create_table_default_permissions(self):
"""
When upgrading to LF tables and database can still have Create Table Default Permissions turned on.
Expand Down Expand Up @@ -177,16 +176,20 @@ def remove_create_table_default_permissions(self):
def get_source_catalog(self):
""" Get the source catalog account details """
try:
log.info(f'Fetching source catalog details for database {self._database}...')
log.info(f'Fetching source catalog details for database {self._database}')
response = self._client.get_database(CatalogId=self._account_id, Name=self._database)
linked_database = response.get('Database', {}).get('TargetDatabase', {})
log.info(f'Fetched source catalog details for database {self._database} are: {linked_database}...')
log.info(f'Fetched source catalog details for database {self._database} are: {linked_database}')
if linked_database:
return Catalog(account_id=linked_database.get('CatalogId'),
database_name=linked_database.get('DatabaseName'),
region=linked_database.get('Region', self._region))

except self._client.exceptions.EntityNotFoundException as enoFnd:
log.exception(f'Could not fetch source catalog details for database {self._database} due to {enoFnd}')
raise enoFnd
except Exception as e:
log.exception(f'Could not fetch source catalog details for database {self._database} due to {e}')
log.exception(f'Error fetching source catalog details for database {self._database} due to {e}')
raise e
return None

Expand All @@ -197,14 +200,17 @@ def get_database_tags(self):
region = self._region

try:
log.info(f'Getting tags for database {database}...')
log.info(f'Getting tags for database {database}')
resource_arn = f'arn:aws:glue:{region}:{account_id}:database/{database}'
response = self._client.get_tags(ResourceArn=resource_arn)
tags = response['Tags']

log.info(f'Successfully retrieved tags: {tags}')

return tags
except self._client.exceptions.EntityNotFoundException as entNotFound:
log.exception(f'Could not get tags for database {database} due to {entNotFound}')
raise entNotFound
except Exception as e:
log.exception(f'Could not get tags for database {database} due to {e}')
log.exception(f'Error fetching tags for {database} due to {e}')
raise e
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool:
if processor:
log.info(f'Granting permissions to tables: {shared_tables}')
approved_tables_succeed = processor.process_approved_shares()
log.info(f'sharing tables succeeded = {approved_tables_succeed}')
log.info(f'Sharing tables succeeded = {approved_tables_succeed}')
else:
approved_tables_succeed = False

Expand Down Expand Up @@ -188,7 +188,7 @@ def revoke_share(cls, engine: Engine, share_uri: str):
existing_shared_items = existing_shared_folders or existing_shared_buckets
log.info(f'Still remaining S3 resources shared = {existing_shared_items}')
if not existing_shared_folders and revoked_folders:
log.info("Clean up S3 access points...")
log.info("Clean up S3 access points")
clean_up_folders = ProcessS3AccessPointShare.clean_up_share(
session,
dataset=dataset,
Expand Down Expand Up @@ -240,7 +240,7 @@ def revoke_share(cls, engine: Engine, share_uri: str):
)
log.info(f'Still remaining LF resources shared = {existing_shared_items}')
if not existing_shared_items and revoked_tables:
log.info("Clean up LF remaining resources...")
log.info("Clean up LF remaining resources")
clean_up_tables = processor.delete_shared_database()
log.info(f"Clean up LF successful = {clean_up_tables}")

Expand Down Expand Up @@ -292,7 +292,12 @@ def create_lf_processor(session,
target_environment,
env_group,
)

# Verify account ownership in case database is in another central catalog account
processor.verify_catalog_ownership()

return processor
# Todo - Add an exception in case GlueClient initialization fails ??
except Exception as e:
log.error(f"Error creating LF processor: {e}")
for table in shared_tables:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def __init__(
self.source_database_name = catalog.database_name if catalog else dataset.GlueDatabaseName
self.principals = self.get_share_principals()
self.shared_db_name = self.build_shared_db_name()
self.verify_catalog_ownership()

@abc.abstractmethod
def process_approved_shares(self) -> [str]:
Expand Down Expand Up @@ -540,11 +539,11 @@ def target_glue_client(self):

def verify_catalog_ownership(self):
if self.catalog_details is None:
logger.info(f'database {self.dataset.GlueDatabaseName} is not a resource link, no catalog information present')
logger.info(f'Database {self.dataset.GlueDatabaseName} is not a resource link, no catalog information present')
return

if self.catalog_details.account_id != self.source_environment.AwsAccountId:
logger.info(f'database {self.dataset.GlueDatabaseName} is a resource link '
logger.info(f'Database {self.dataset.GlueDatabaseName} is a resource link '
f'the source database {self.catalog_details.database_name} belongs to a catalog account {self.catalog_details.account_id}')
if SessionHelper.is_assumable_pivot_role(self.catalog_details.account_id):
self.validate_catalog_ownership_tag()
Expand All @@ -559,4 +558,4 @@ def validate_catalog_ownership_tag(self):
if tags.get('owner_account_id', '') == self.source_environment.AwsAccountId:
logger.info(f'owner_account_id tag exists and matches the source account id {self.source_environment.AwsAccountId}')
else:
raise Exception(f'owner_account_id tag does not exist or does not matches the source account id {self.source_environment.AwsAccountId}')
raise Exception(f'owner_account_id tag does not exist or does not match the source account id {self.source_environment.AwsAccountId}')

0 comments on commit 2fd031b

Please sign in to comment.