Skip to content

Commit

Permalink
Merge branch 'data-dot-all:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
mourya-33 authored Aug 2, 2024
2 parents 796b531 + 59d0650 commit 6cd7ceb
Show file tree
Hide file tree
Showing 115 changed files with 8,556 additions and 2,631 deletions.
1 change: 1 addition & 0 deletions backend/dataall/base/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from dataall.base.api import gql
from dataall.base.api.constants import GraphQLEnumMapper
from dataall.base.api.queries import enumsQuery


def bootstrap():
Expand Down
11 changes: 11 additions & 0 deletions backend/dataall/base/api/queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from dataall.base.api import gql
from dataall.base.api.resolvers import enum_resolver
from dataall.base.api.types import EnumResult

enumsQuery = gql.QueryField(
name='queryEnums',
args=[gql.Argument(name='enums_names', type=gql.ArrayType(gql.String))],
type=gql.ArrayType(EnumResult),
resolver=enum_resolver,
test_scope='Enums',
)
14 changes: 14 additions & 0 deletions backend/dataall/base/api/resolvers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from dataall.base.api.constants import GraphQLEnumMapper


def enum_resolver(context, source, enums_names):
result = []
for enum_class in GraphQLEnumMapper.__subclasses__():
if enum_class.__name__ in enums_names:
result.append(
{
'name': enum_class.__name__,
'items': [{'name': item.name, 'value': str(item.value)} for item in enum_class],
}
)
return result
17 changes: 17 additions & 0 deletions backend/dataall/base/api/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from dataall.base.api import gql

EnumItem = gql.ObjectType(
name='EnumItem',
fields=[
gql.Field(name='name', type=gql.String),
gql.Field(name='value', type=gql.String),
],
)

EnumResult = gql.ObjectType(
name='EnumResult',
fields=[
gql.Field(name='name', type=gql.String),
gql.Field(name='items', type=gql.ArrayType(EnumItem)),
],
)
6 changes: 3 additions & 3 deletions backend/dataall/base/cdkproxy/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
aws-cdk-lib==2.99.0
boto3==1.28.23
boto3-stubs==1.28.23
botocore==1.31.23
boto3==1.34.119
boto3-stubs==1.34.119
botocore==1.34.119
cdk-nag==2.7.2
constructs==10.0.73
starlette==0.36.3
Expand Down
11 changes: 11 additions & 0 deletions backend/dataall/base/db/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,14 @@ def paginate(query, page, page_size):
# nosemgrep: python.sqlalchemy.performance.performance-improvements.len-all-count
total = len(query.order_by(None).all())
return Page(items, page, page_size, total)


def paginate_list(items, page, page_size):
if page <= 0:
raise AttributeError('page needs to be >= 1')
if page_size <= 0:
raise AttributeError('page_size needs to be >= 1')
start = (page - 1) * page_size
end = start + page_size
total = len(items)
return Page(items[start:end], page, page_size, total)
2 changes: 1 addition & 1 deletion backend/dataall/core/environment/cdk/pivot_role_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def generate_policies(self) -> List[iam.ManagedPolicy]:
)

for service in services:
statements.extend(service.get_statements(self))
logger.info(f'Adding {service.__name__} statements to policy')
statements.extend(service.get_statements(self))
logger.info(f'statements: {str(service.get_statements(self))}')

statements_chunks = split_policy_statements_in_chunks(statements)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def has_group_resource_permission(
else:
return policy

@staticmethod
def query_all_resource_policies(
session, group_uri: str, resource_uri: str, resource_type: str = None, permissions: List[str] = None
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

class DatasetTypes(GraphQLEnumMapper):
S3 = 'S3'
Redshift = 'Redshift'


class DatasetRole(GraphQLEnumMapper):
Expand Down
96 changes: 96 additions & 0 deletions backend/dataall/modules/redshift_datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Contains the code related to datasets"""

import logging
from typing import List, Type, Set

from dataall.base.loader import ModuleInterface, ImportMode

log = logging.getLogger(__name__)


class RedshiftDatasetApiModuleInterface(ModuleInterface):
"""Implements ModuleInterface for dataset GraphQl lambda"""

@staticmethod
def is_supported(modes):
return ImportMode.API in modes

@staticmethod
def depends_on() -> List[Type['ModuleInterface']]:
from dataall.modules.datasets_base import DatasetBaseApiModuleInterface
from dataall.modules.catalog import CatalogApiModuleInterface
from dataall.modules.feed import FeedApiModuleInterface
from dataall.modules.vote import VoteApiModuleInterface

return [
DatasetBaseApiModuleInterface,
CatalogApiModuleInterface,
FeedApiModuleInterface,
VoteApiModuleInterface,
]

def __init__(self):
from dataall.modules.vote.services.vote_service import add_vote_type
from dataall.modules.feed.api.registry import FeedRegistry, FeedDefinition
from dataall.modules.catalog.indexers.registry import GlossaryRegistry, GlossaryDefinition
from dataall.core.environment.services.environment_resource_manager import EnvironmentResourceManager

from dataall.modules.redshift_datasets.indexers.dataset_indexer import DatasetIndexer
from dataall.modules.redshift_datasets.indexers.table_indexer import DatasetTableIndexer
from dataall.modules.redshift_datasets.db.redshift_dataset_repositories import (
RedshiftDatasetEnvironmentResource,
)
from dataall.modules.redshift_datasets.db.redshift_connection_repositories import (
RedshiftConnectionEnvironmentResource,
)
from dataall.modules.redshift_datasets.db.redshift_models import RedshiftDataset, RedshiftTable
from dataall.modules.redshift_datasets.services.redshift_constants import (
GLOSSARY_REDSHIFT_DATASET_NAME,
GLOSSARY_REDSHIFT_DATASET_TABLE_NAME,
FEED_REDSHIFT_DATASET_NAME,
FEED_REDSHIFT_DATASET_TABLE_NAME,
VOTE_REDSHIFT_DATASET_NAME,
)

import dataall.modules.redshift_datasets.api

FeedRegistry.register(FeedDefinition(FEED_REDSHIFT_DATASET_TABLE_NAME, RedshiftTable))
FeedRegistry.register(FeedDefinition(FEED_REDSHIFT_DATASET_NAME, RedshiftDataset))

GlossaryRegistry.register(
GlossaryDefinition(
target_type=GLOSSARY_REDSHIFT_DATASET_NAME,
object_type='RedshiftDataset',
model=RedshiftDataset,
reindexer=DatasetIndexer,
)
)

GlossaryRegistry.register(
GlossaryDefinition(
target_type=GLOSSARY_REDSHIFT_DATASET_TABLE_NAME,
object_type='RedshiftDatasetTable',
model=RedshiftTable,
reindexer=DatasetTableIndexer,
)
)

add_vote_type(VOTE_REDSHIFT_DATASET_NAME, DatasetIndexer)

EnvironmentResourceManager.register(RedshiftDatasetEnvironmentResource())
EnvironmentResourceManager.register(RedshiftConnectionEnvironmentResource())

log.info('API of Redshift datasets has been imported')


class RedshiftDatasetCdkModuleInterface(ModuleInterface):
"""Loads dataset cdk stacks"""

@staticmethod
def is_supported(modes: Set[ImportMode]):
return ImportMode.CDK in modes

def __init__(self):
import dataall.modules.redshift_datasets.cdk

log.info('Redshift Dataset CDK has been imported')
5 changes: 5 additions & 0 deletions backend/dataall/modules/redshift_datasets/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""The GraphQL schema of datasets and related functionality"""

from dataall.modules.redshift_datasets.api import connections, datasets

__all__ = ['connections', 'datasets']
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from . import (
input_types,
queries,
mutations,
resolvers,
types,
)

__all__ = ['resolvers', 'types', 'input_types', 'queries', 'mutations']
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from dataall.base.api.constants import GraphQLEnumMapper


class RedshiftType(GraphQLEnumMapper):
Serverless = 'serverless'
Cluster = 'cluster'
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dataall.base.api import gql


CreateRedshiftConnectionInput = gql.InputType(
name='CreateRedshiftConnectionInput',
arguments=[
gql.Argument('connectionName', gql.NonNullableType(gql.String)),
gql.Argument('environmentUri', gql.NonNullableType(gql.String)),
gql.Argument('SamlGroupName', gql.NonNullableType(gql.String)),
gql.Argument('redshiftType', gql.NonNullableType(gql.String)),
gql.Argument('clusterId', gql.String),
gql.Argument('nameSpaceId', gql.String),
gql.Argument('workgroup', gql.String),
gql.Argument('database', gql.NonNullableType(gql.String)),
gql.Argument('redshiftUser', gql.String),
gql.Argument('secretArn', gql.String),
],
)


ConnectionFilter = gql.InputType(
name='ConnectionFilter',
arguments=[
gql.Argument('term', gql.String),
gql.Argument('page', gql.Integer),
gql.Argument('pageSize', gql.Integer),
gql.Argument('environmentUri', gql.String),
gql.Argument('groupUri', gql.String),
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from dataall.base.api import gql
from dataall.modules.redshift_datasets.api.connections.resolvers import (
create_redshift_connection,
delete_redshift_connection,
)
from dataall.modules.redshift_datasets.api.connections.types import (
RedshiftConnection,
)

createRedshiftConnection = gql.MutationField(
name='createRedshiftConnection',
args=[gql.Argument('input', gql.Ref('CreateRedshiftConnectionInput'))],
type=RedshiftConnection,
resolver=create_redshift_connection,
)

deleteRedshiftConnection = gql.MutationField(
name='deleteRedshiftConnection',
args=[gql.Argument('connectionUri', gql.NonNullableType(gql.String))],
type=gql.Boolean,
resolver=delete_redshift_connection,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dataall.base.api import gql
from dataall.modules.redshift_datasets.api.connections.resolvers import (
list_environment_redshift_connections,
list_redshift_connection_schemas,
list_redshift_schema_tables,
)

listEnvironmentRedshiftConnections = gql.QueryField(
name='listEnvironmentRedshiftConnections',
args=[gql.Argument('filter', gql.Ref('ConnectionFilter'))],
type=gql.Ref('RedshiftConnectionSearchResult'),
resolver=list_environment_redshift_connections,
)

listRedshiftConnectionSchemas = gql.QueryField(
name='listRedshiftConnectionSchemas',
args=[gql.Argument('connectionUri', gql.NonNullableType(gql.String))],
type=gql.ArrayType(gql.String),
resolver=list_redshift_connection_schemas,
)

listRedshiftSchemaTables = gql.QueryField(
name='listRedshiftSchemaTables',
args=[
gql.Argument('connectionUri', gql.NonNullableType(gql.String)),
gql.Argument('schema', gql.NonNullableType(gql.String)),
],
type=gql.ArrayType(gql.Ref('RedshiftTable')),
resolver=list_redshift_schema_tables,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import logging
from typing import Any
from dataall.base.db import exceptions
from dataall.base.api.context import Context

from dataall.modules.redshift_datasets.api.connections.enums import RedshiftType
from dataall.modules.redshift_datasets.services.redshift_connection_service import RedshiftConnectionService

log = logging.getLogger(__name__)


def create_redshift_connection(context: Context, source, input=None):
RequestValidator.validate_connection_creation_request(data=input)
return RedshiftConnectionService.create_redshift_connection(
uri=input['environmentUri'], admin_group=input['SamlGroupName'], data=input
)


def delete_redshift_connection(context: Context, source, connectionUri: str):
RequestValidator.required_param('connectionUri', connectionUri)
return RedshiftConnectionService.delete_redshift_connection(uri=connectionUri)


def list_environment_redshift_connections(context: Context, source, filter: dict = None):
environmentUri = filter['environmentUri']
RequestValidator.required_param('environmentUri', environmentUri)
return RedshiftConnectionService.list_environment_redshift_connections(uri=environmentUri, filter=filter)


def list_redshift_connection_schemas(context: Context, source, connectionUri):
RequestValidator.required_param('connectionUri', connectionUri)
return RedshiftConnectionService.list_connection_schemas(uri=connectionUri)


def list_redshift_schema_tables(context: Context, source, connectionUri: str, schema: str):
RequestValidator.required_param('connectionUri', connectionUri)
RequestValidator.required_param('schema', schema)
return RedshiftConnectionService.list_schema_tables(uri=connectionUri, schema=schema)


class RequestValidator:
def required_param(param_name: str, param_value: Any):
if not param_value:
raise exceptions.RequiredParameter(param_name)

def validate_connection_creation_request(data):
if not data:
raise exceptions.RequiredParameter('data')

RequestValidator.required_param('SamlGroupName', data.get('SamlGroupName'))
RequestValidator.required_param('environmentUri', data.get('environmentUri'))
RequestValidator.required_param('connectionName', data.get('connectionName'))
RequestValidator.required_param('redshiftType', data.get('redshiftType'))
RequestValidator.required_param('database', data.get('database'))
if not data.get('redshiftUser') and not data.get('secretArn'):
raise exceptions.RequiredParameter('RedshiftUser OR secretArn')
if data.get('redshiftType') == RedshiftType.Serverless.value:
RequestValidator.required_param('nameSpaceId', data.get('nameSpaceId'))
RequestValidator.required_param('workgroup', data.get('workgroup'))
if data.get('redshiftType') == RedshiftType.Cluster.value:
RequestValidator.required_param('clusterId', data.get('clusterId'))
Loading

0 comments on commit 6cd7ceb

Please sign in to comment.