Skip to content

Commit

Permalink
Updated glue crawler security config
Browse files Browse the repository at this point in the history
  • Loading branch information
Mourya Darivemula committed Jul 26, 2024
1 parent 1cec5fe commit 796b531
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions backend/dataall/modules/s3_datasets/cdk/dataset_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Duration,
CfnResource,
CustomResource,
RemovalPolicy,
Tags,
)
from aws_cdk.aws_glue import CfnCrawler
Expand Down Expand Up @@ -292,6 +293,14 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
f'arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:crawler/{dataset.GlueCrawlerName}',
],
),
iam.PolicyStatement(
sid='GlueSecurityConfiguration',
actions=[
'glue:GetSecurityConfiguration',
],
effect=iam.Effect.ALLOW,
resources=['*'],
),
iam.PolicyStatement(
sid='GlueAccessDefault',
actions=[
Expand Down Expand Up @@ -322,6 +331,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
effect=iam.Effect.ALLOW,
resources=[
f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/crawlers:log-stream:{dataset.GlueCrawlerName}',
f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/crawlers-role/{dataset.GlueCrawlerName}*:log-stream:{dataset.GlueCrawlerName}*',
f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/jobs/*',
],
),
Expand Down Expand Up @@ -437,20 +447,61 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
},
)

glue_sec_conf_enc_key = kms.Key(
self,
f'crwlr_log_enc_key_{dataset.GlueCrawlerName}',
removal_policy=RemovalPolicy.DESTROY,
alias=f'crwlr_log_enc_key_{dataset.GlueCrawlerName}',
enable_key_rotation=True,
)

glue_sec_conf_enc_key.add_to_resource_policy(
iam.PolicyStatement(
sid='EnableCrawlerIAMRoleKeyUsage',
resources=['*'],
effect=iam.Effect.ALLOW,
principals=[dataset_admin_role],
actions=['kms:Encrypt', 'kms:Decrypt', 'kms:ReEncrypt*', 'kms:GenerateDataKey*', 'kms:DescribeKey'],
)
)

glue_sec_conf_enc_key.grant_encrypt_decrypt(iam.ServicePrincipal('logs.amazonaws.com'))
glue_sec_conf_enc_key.grant_encrypt_decrypt(iam.ServicePrincipal('glue.amazonaws.com'))
glue_sec_conf_enc_key.grant_encrypt_decrypt(iam.ServicePrincipal('s3.amazonaws.com'))

glue_crawler_security_config = glue.CfnSecurityConfiguration(
self,
f'crwlr_sec_config-{dataset.GlueCrawlerName}',
encryption_configuration=glue.CfnSecurityConfiguration.EncryptionConfigurationProperty(
cloud_watch_encryption=glue.CfnSecurityConfiguration.CloudWatchEncryptionProperty(
cloud_watch_encryption_mode='SSE-KMS', kms_key_arn=glue_sec_conf_enc_key.key_arn
),
job_bookmarks_encryption=glue.CfnSecurityConfiguration.JobBookmarksEncryptionProperty(
job_bookmarks_encryption_mode='CSE-KMS', kms_key_arn=glue_sec_conf_enc_key.key_arn
),
s3_encryptions=glue.CfnSecurityConfiguration.S3EncryptionProperty(s3_encryption_mode='SSE-S3'),
),
name=f'crwlr_sec_config-{dataset.GlueCrawlerName}',
)

# Support resources: GlueCrawler for the dataset, Profiling Job and Trigger
crawler = glue.CfnCrawler(
self,
dataset.GlueCrawlerName,
description=f'datall Glue Crawler for S3 Bucket {dataset.S3BucketName}',
name=dataset.GlueCrawlerName,
database_name=dataset.GlueDatabaseName,
crawler_security_configuration=glue_crawler_security_config.name,
schedule={'scheduleExpression': f'{dataset.GlueCrawlerSchedule}'} if dataset.GlueCrawlerSchedule else None,
role=dataset_admin_role.role_arn,
targets=CfnCrawler.TargetsProperty(
s3_targets=[CfnCrawler.S3TargetProperty(path=f's3://{dataset.S3BucketName}')]
),
)
crawler.node.add_dependency(dataset_bucket)
crawler.node.add_dependency(dataset_admin_policy)
crawler.node.add_dependency(dataset_admin_role)
crawler.node.add_dependency(glue_crawler_security_config)

job_args = {
'--additional-python-modules': 'urllib3<2,pydeequ',
Expand Down Expand Up @@ -480,6 +531,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
role=dataset_admin_role.role_arn,
allocated_capacity=10,
execution_property=glue.CfnJob.ExecutionPropertyProperty(max_concurrent_runs=100),
security_configuration=glue_crawler_security_config.name,
command=glue.CfnJob.JobCommandProperty(
name='glueetl',
python_version='3',
Expand Down

0 comments on commit 796b531

Please sign in to comment.