Skip to content

Commit

Permalink
Separating Out Access Logging (#1695)
Browse files Browse the repository at this point in the history
### Feature or Bugfix
<!-- please choose -->
- Refactoring

### Detail
- Move access logging to a separate environment logging bucket (rather
than env default bucket used for athena queries and profiling job
artifacts)

### Relates
- <URL or Ticket>

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
noah-paige authored Nov 14, 2024
1 parent d9f90df commit b30a354
Show file tree
Hide file tree
Showing 15 changed files with 172 additions and 68 deletions.
52 changes: 6 additions & 46 deletions .checkov.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@
]
},
{
"file": "/cdk.out/asset.3045cb6b4340be1e173df6dcf6248d565aa849ceda3e2cf2c2f221ccee4bc1d6/pivotRole.yaml",
"file": "/cdk.out/asset.05d71d8b69cd4483d3c9db9120b556b718c72f349debbb79d461c74c4964b350/pivotRole.yaml",
"findings": [
{
"resource": "AWS::IAM::ManagedPolicy.PivotRolePolicy0",
Expand Down Expand Up @@ -490,12 +490,6 @@
{
"file": "/checkov_environment_synth.json",
"findings": [
{
"resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy19AC37181",
"check_ids": [
"CKV_AWS_111"
]
},
{
"resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy2E85AF510",
"check_ids": [
Expand All @@ -508,24 +502,6 @@
"CKV_AWS_111"
]
},
{
"resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy5A19E75CA",
"check_ids": [
"CKV_AWS_109"
]
},
{
"resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicyCC720210",
"check_ids": [
"CKV_AWS_109"
]
},
{
"resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy1A0C96958",
"check_ids": [
"CKV_AWS_111"
]
},
{
"resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy2B12D381A",
"check_ids": [
Expand All @@ -538,18 +514,6 @@
"CKV_AWS_111"
]
},
{
"resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy3E3CBA9E",
"check_ids": [
"CKV_AWS_109"
]
},
{
"resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy56D7DC525",
"check_ids": [
"CKV_AWS_109"
]
},
{
"resource": "AWS::Lambda::Function.CustomCDKBucketDeployment8693BB64968944B69AAFB0CC9EB8756C81C01536",
"check_ids": [
Expand All @@ -563,38 +527,34 @@
"resource": "AWS::Lambda::Function.GlueDatabaseLFCustomResourceHandler7FAF0F82",
"check_ids": [
"CKV_AWS_115",
"CKV_AWS_117",
"CKV_AWS_173"
"CKV_AWS_117"
]
},
{
"resource": "AWS::Lambda::Function.LakeformationDefaultSettingsHandler2CBEDB06",
"check_ids": [
"CKV_AWS_115",
"CKV_AWS_117",
"CKV_AWS_173"
"CKV_AWS_117"
]
},
{
"resource": "AWS::Lambda::Function.dataallGlueDbCustomResourceProviderframeworkonEventF8347BA7",
"check_ids": [
"CKV_AWS_115",
"CKV_AWS_116",
"CKV_AWS_117",
"CKV_AWS_173"
"CKV_AWS_117"
]
},
{
"resource": "AWS::Lambda::Function.dataallLakeformationDefaultSettingsProviderframeworkonEventBB660E32",
"check_ids": [
"CKV_AWS_115",
"CKV_AWS_116",
"CKV_AWS_117",
"CKV_AWS_173"
"CKV_AWS_117"
]
},
{
"resource": "AWS::S3::Bucket.EnvironmentDefaultBucket78C3A8B0",
"resource": "AWS::S3::Bucket.EnvironmentDefaultLogBucket7F0EFAB3",
"check_ids": [
"CKV_AWS_18"
]
Expand Down
1 change: 1 addition & 0 deletions backend/dataall/core/environment/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
gql.Field('subscriptionsConsumersTopicName', type=gql.String),
gql.Field('subscriptionsProducersTopicName', type=gql.String),
gql.Field('EnvironmentDefaultBucketName', type=gql.String),
gql.Field('EnvironmentLogsBucketName', type=gql.String),
gql.Field('EnvironmentDefaultAthenaWorkGroup', type=gql.String),
gql.Field(
name='networks',
Expand Down
32 changes: 23 additions & 9 deletions backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,30 +165,44 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
f'arn:aws:iam::{self._environment.AwsAccountId}:role/{self.pivot_role_name}',
)

# Environment S3 Bucket
default_environment_bucket = s3.Bucket(
# Environment Logging S3 Bucket
default_environment_log_bucket = s3.Bucket(
self,
'EnvironmentDefaultBucket',
bucket_name=self._environment.EnvironmentDefaultBucketName,
'EnvironmentDefaultLogBucket',
bucket_name=self._environment.EnvironmentLogsBucketName,
encryption=s3.BucketEncryption.S3_MANAGED,
removal_policy=RemovalPolicy.RETAIN,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
versioned=True,
enforce_ssl=True,
)
default_environment_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN)
self.default_environment_bucket = default_environment_bucket

default_environment_bucket.add_to_resource_policy(
default_environment_log_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN)
default_environment_log_bucket.add_to_resource_policy(
iam.PolicyStatement(
sid='AWSLogDeliveryWrite',
effect=iam.Effect.ALLOW,
principals=[iam.ServicePrincipal('logging.s3.amazonaws.com')],
actions=['s3:PutObject', 's3:PutObjectAcl'],
resources=[f'{default_environment_bucket.bucket_arn}/*'],
resources=[f'{default_environment_log_bucket.bucket_arn}/*'],
)
)

# Environment S3 Bucket
default_environment_bucket = s3.Bucket(
self,
'EnvironmentDefaultBucket',
bucket_name=self._environment.EnvironmentDefaultBucketName,
encryption=s3.BucketEncryption.S3_MANAGED,
removal_policy=RemovalPolicy.RETAIN,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
versioned=True,
enforce_ssl=True,
server_access_logs_bucket=default_environment_log_bucket,
server_access_logs_prefix=f'access_logs/{self._environment.EnvironmentDefaultBucketName}/',
)
default_environment_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN)
self.default_environment_bucket = default_environment_bucket

default_environment_bucket.add_lifecycle_rule(
abort_incomplete_multipart_upload_after=Duration.days(7),
noncurrent_version_transitions=[
Expand Down
1 change: 1 addition & 0 deletions backend/dataall/core/environment/db/environment_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class Environment(Resource, Base):
EnvironmentDefaultIAMRoleImported = Column(Boolean, default=False)
EnvironmentDefaultIAMRoleArn = Column(String, nullable=False)
EnvironmentDefaultBucketName = Column(String)
EnvironmentLogsBucketName = Column(String)
EnvironmentDefaultAthenaWorkGroup = Column(String)
roleCreated = Column(Boolean, nullable=False, default=False)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,13 @@ def create_environment(uri, data=None):
resource_prefix=env.resourcePrefix,
).build_compliant_name()

env.EnvironmentLogsBucketName = NamingConventionService(
target_uri=env.environmentUri,
target_label='env-access-logs',
pattern=NamingConventionPattern.S3,
resource_prefix=env.resourcePrefix,
).build_compliant_name()

env.EnvironmentDefaultAthenaWorkGroup = NamingConventionService(
target_uri=env.environmentUri,
target_label=env.label,
Expand Down
2 changes: 1 addition & 1 deletion backend/dataall/modules/s3_datasets/cdk/dataset_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
server_access_logs_bucket=s3.Bucket.from_bucket_name(
self,
'EnvAccessLogsBucket',
f'{env.EnvironmentDefaultBucketName}',
f'{env.EnvironmentLogsBucketName}',
),
server_access_logs_prefix=f'access_logs/{dataset.S3BucketName}/',
enforce_ssl=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base

from dataall.core.environment.db.environment_models import Environment
from dataall.core.environment.services.environment_service import EnvironmentService
from dataall.base.db import utils
from dataall.base.db import utils, Resource
from datetime import datetime

from dataall.core.permissions.services.permission_service import PermissionService
Expand All @@ -33,6 +32,11 @@
Base = declarative_base()


class Environment(Resource, Base):
__tablename__ = 'environment'
environmentUri = Column(String, primary_key=True, default=utils.uuid('environment'))


class EnvironmentGroup(Base):
__tablename__ = 'environment_group_permission'
groupUri = Column(String, primary_key=True)
Expand Down Expand Up @@ -123,7 +127,7 @@ def upgrade():
bind = op.get_bind()
session = orm.Session(bind=bind)
print('Back-filling consumer role permissions for environments...')
envs = EnvironmentService.list_all_active_environments(session=session)
envs = session.query(Environment).filter(Environment.deleted.is_(None)).all()
for env in envs:
groups = EnvironmentService.get_all_environment_groups(session=session, uri=env.environmentUri, filter=None)
for group in groups:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,29 @@
"""

from alembic import op
from sqlalchemy import orm
from dataall.core.environment.db.environment_models import EnvironmentGroup, Environment
from sqlalchemy import orm, Column, String
from sqlalchemy.ext.declarative import declarative_base
from dataall.core.environment.db.environment_models import EnvironmentGroup
from dataall.core.organizations.db.organization_repositories import OrganizationRepository
from dataall.core.permissions.services.organization_permissions import GET_ORGANIZATION
from dataall.core.organizations.db import organization_models as models
from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService
from dataall.base.db import utils, Resource

# revision identifiers, used by Alembic.
revision = '328e35e39e1e'
down_revision = '448d9dc95e94'
branch_labels = None
depends_on = None

Base = declarative_base()


class Environment(Resource, Base):
__tablename__ = 'environment'
environmentUri = Column(String, primary_key=True, default=utils.uuid('environment'))
organizationUri = Column(String, nullable=False)


def get_session():
bind = op.get_bind()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
"""

from alembic import op
from sqlalchemy import orm
from sqlalchemy import orm, Column, String
from sqlalchemy.ext.declarative import declarative_base

from dataall.core.environment.db.environment_models import Environment
from dataall.base.db import utils, Resource
from dataall.core.organizations.db.organization_models import Organization
from dataall.core.permissions.api.enums import PermissionType
from dataall.core.permissions.services.permission_service import PermissionService
Expand All @@ -28,6 +29,15 @@
branch_labels = None
depends_on = None

Base = declarative_base()


class Environment(Resource, Base):
__tablename__ = 'environment'
organizationUri = Column(String, nullable=False)
environmentUri = Column(String, primary_key=True, default=utils.uuid('environment'))
SamlGroupName = Column(String, nullable=False)


def get_session():
bind = op.get_bind()
Expand Down
55 changes: 55 additions & 0 deletions backend/migrations/versions/49c6b18ed814_add_env_logs_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""add_env_logs_bucket
Revision ID: 49c6b18ed814
Revises: b21f86882012
Create Date: 2024-11-13 19:16:18.030415
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy import orm, Column, String
from sqlalchemy.ext.declarative import declarative_base

from dataall.base.db import Resource, utils
from dataall.base.utils.naming_convention import (
NamingConventionService,
NamingConventionPattern,
)

# revision identifiers, used by Alembic.
revision = '49c6b18ed814'
down_revision = 'b21f86882012'
branch_labels = None
depends_on = None

Base = declarative_base()


class Environment(Resource, Base):
__tablename__ = 'environment'
environmentUri = Column(String, primary_key=True, default=utils.uuid('environment'))
resourcePrefix = Column(String, nullable=False, default='dataall')
EnvironmentLogsBucketName = Column(String, nullable=True)


def upgrade():
op.add_column('environment', sa.Column('EnvironmentLogsBucketName', sa.String(), nullable=True))
bind = op.get_bind()
session = orm.Session(bind=bind)
environments = session.query(Environment).all()
for env in environments:
env.EnvironmentLogsBucketName = NamingConventionService(
target_uri=env.environmentUri,
target_label='env-access-logs',
pattern=NamingConventionPattern.S3,
resource_prefix=env.resourcePrefix,
).build_compliant_name()
session.commit()
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('environment', 'EnvironmentLogsBucketName')
# ### end Alembic commands ###
15 changes: 12 additions & 3 deletions backend/migrations/versions/852cdf6cf1e0_add_redshift_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@

from alembic import op
import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy import orm, Column, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects import postgresql

from dataall.core.environment.db.environment_models import Environment
from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService
from dataall.core.permissions.services.permission_service import PermissionService
from dataall.core.permissions.api.enums import PermissionType

from dataall.base.db import utils, Resource

# revision identifiers, used by Alembic.
revision = '852cdf6cf1e0'
Expand All @@ -34,6 +34,15 @@
ENVIRONMENT_REDSHIFT_ALL_WITH_DESC[CREATE_REDSHIFT_CONNECTION] = 'Create Redshift Connection in this environment'
ENVIRONMENT_REDSHIFT_ALL_WITH_DESC[IMPORT_REDSHIFT_DATASET] = 'Import Redshift Datasets to this environment'

Base = declarative_base()


class Environment(Resource, Base):
__tablename__ = 'environment'
organizationUri = Column(String, nullable=False)
environmentUri = Column(String, primary_key=True, default=utils.uuid('environment'))
SamlGroupName = Column(String, nullable=False)


def upgrade():
op.create_table(
Expand Down
Loading

0 comments on commit b30a354

Please sign in to comment.