forked from data-dot-all/dataall
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Redshift datasets module (data-dot-all#1424)
### Feature or Bugfix - Feature ### Detail The design is up-to-date in the issue data-dot-all#955 as well as next steps ### Relates - data-dot-all#955 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
- Loading branch information
Showing
103 changed files
with
8,025 additions
and
170 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
"""Contains the code related to datasets""" | ||
|
||
import logging | ||
from typing import List, Type, Set | ||
|
||
from dataall.base.loader import ModuleInterface, ImportMode | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
class RedshiftDatasetApiModuleInterface(ModuleInterface): | ||
"""Implements ModuleInterface for dataset GraphQl lambda""" | ||
|
||
@staticmethod | ||
def is_supported(modes): | ||
return ImportMode.API in modes | ||
|
||
@staticmethod | ||
def depends_on() -> List[Type['ModuleInterface']]: | ||
from dataall.modules.datasets_base import DatasetBaseApiModuleInterface | ||
from dataall.modules.catalog import CatalogApiModuleInterface | ||
from dataall.modules.feed import FeedApiModuleInterface | ||
from dataall.modules.vote import VoteApiModuleInterface | ||
|
||
return [ | ||
DatasetBaseApiModuleInterface, | ||
CatalogApiModuleInterface, | ||
FeedApiModuleInterface, | ||
VoteApiModuleInterface, | ||
] | ||
|
||
def __init__(self): | ||
from dataall.modules.vote.services.vote_service import add_vote_type | ||
from dataall.modules.feed.api.registry import FeedRegistry, FeedDefinition | ||
from dataall.modules.catalog.indexers.registry import GlossaryRegistry, GlossaryDefinition | ||
from dataall.core.environment.services.environment_resource_manager import EnvironmentResourceManager | ||
|
||
from dataall.modules.redshift_datasets.indexers.dataset_indexer import DatasetIndexer | ||
from dataall.modules.redshift_datasets.indexers.table_indexer import DatasetTableIndexer | ||
from dataall.modules.redshift_datasets.db.redshift_dataset_repositories import ( | ||
RedshiftDatasetEnvironmentResource, | ||
) | ||
from dataall.modules.redshift_datasets.db.redshift_connection_repositories import ( | ||
RedshiftConnectionEnvironmentResource, | ||
) | ||
from dataall.modules.redshift_datasets.db.redshift_models import RedshiftDataset, RedshiftTable | ||
from dataall.modules.redshift_datasets.services.redshift_constants import ( | ||
GLOSSARY_REDSHIFT_DATASET_NAME, | ||
GLOSSARY_REDSHIFT_DATASET_TABLE_NAME, | ||
FEED_REDSHIFT_DATASET_NAME, | ||
FEED_REDSHIFT_DATASET_TABLE_NAME, | ||
VOTE_REDSHIFT_DATASET_NAME, | ||
) | ||
|
||
import dataall.modules.redshift_datasets.api | ||
|
||
FeedRegistry.register(FeedDefinition(FEED_REDSHIFT_DATASET_TABLE_NAME, RedshiftTable)) | ||
FeedRegistry.register(FeedDefinition(FEED_REDSHIFT_DATASET_NAME, RedshiftDataset)) | ||
|
||
GlossaryRegistry.register( | ||
GlossaryDefinition( | ||
target_type=GLOSSARY_REDSHIFT_DATASET_NAME, | ||
object_type='RedshiftDataset', | ||
model=RedshiftDataset, | ||
reindexer=DatasetIndexer, | ||
) | ||
) | ||
|
||
GlossaryRegistry.register( | ||
GlossaryDefinition( | ||
target_type=GLOSSARY_REDSHIFT_DATASET_TABLE_NAME, | ||
object_type='RedshiftDatasetTable', | ||
model=RedshiftTable, | ||
reindexer=DatasetTableIndexer, | ||
) | ||
) | ||
|
||
add_vote_type(VOTE_REDSHIFT_DATASET_NAME, DatasetIndexer) | ||
|
||
EnvironmentResourceManager.register(RedshiftDatasetEnvironmentResource()) | ||
EnvironmentResourceManager.register(RedshiftConnectionEnvironmentResource()) | ||
|
||
log.info('API of Redshift datasets has been imported') | ||
|
||
|
||
class RedshiftDatasetCdkModuleInterface(ModuleInterface): | ||
"""Loads dataset cdk stacks""" | ||
|
||
@staticmethod | ||
def is_supported(modes: Set[ImportMode]): | ||
return ImportMode.CDK in modes | ||
|
||
def __init__(self): | ||
import dataall.modules.redshift_datasets.cdk | ||
|
||
log.info('Redshift Dataset CDK has been imported') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
"""The GraphQL schema of datasets and related functionality""" | ||
|
||
from dataall.modules.redshift_datasets.api import connections, datasets | ||
|
||
__all__ = ['connections', 'datasets'] |
9 changes: 9 additions & 0 deletions
9
backend/dataall/modules/redshift_datasets/api/connections/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from . import ( | ||
input_types, | ||
queries, | ||
mutations, | ||
resolvers, | ||
types, | ||
) | ||
|
||
__all__ = ['resolvers', 'types', 'input_types', 'queries', 'mutations'] |
6 changes: 6 additions & 0 deletions
6
backend/dataall/modules/redshift_datasets/api/connections/enums.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from dataall.base.api.constants import GraphQLEnumMapper | ||
|
||
|
||
class RedshiftType(GraphQLEnumMapper): | ||
Serverless = 'serverless' | ||
Cluster = 'cluster' |
30 changes: 30 additions & 0 deletions
30
backend/dataall/modules/redshift_datasets/api/connections/input_types.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from dataall.base.api import gql | ||
|
||
|
||
CreateRedshiftConnectionInput = gql.InputType( | ||
name='CreateRedshiftConnectionInput', | ||
arguments=[ | ||
gql.Argument('connectionName', gql.NonNullableType(gql.String)), | ||
gql.Argument('environmentUri', gql.NonNullableType(gql.String)), | ||
gql.Argument('SamlGroupName', gql.NonNullableType(gql.String)), | ||
gql.Argument('redshiftType', gql.NonNullableType(gql.String)), | ||
gql.Argument('clusterId', gql.String), | ||
gql.Argument('nameSpaceId', gql.String), | ||
gql.Argument('workgroup', gql.String), | ||
gql.Argument('database', gql.NonNullableType(gql.String)), | ||
gql.Argument('redshiftUser', gql.String), | ||
gql.Argument('secretArn', gql.String), | ||
], | ||
) | ||
|
||
|
||
ConnectionFilter = gql.InputType( | ||
name='ConnectionFilter', | ||
arguments=[ | ||
gql.Argument('term', gql.String), | ||
gql.Argument('page', gql.Integer), | ||
gql.Argument('pageSize', gql.Integer), | ||
gql.Argument('environmentUri', gql.String), | ||
gql.Argument('groupUri', gql.String), | ||
], | ||
) |
22 changes: 22 additions & 0 deletions
22
backend/dataall/modules/redshift_datasets/api/connections/mutations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from dataall.base.api import gql | ||
from dataall.modules.redshift_datasets.api.connections.resolvers import ( | ||
create_redshift_connection, | ||
delete_redshift_connection, | ||
) | ||
from dataall.modules.redshift_datasets.api.connections.types import ( | ||
RedshiftConnection, | ||
) | ||
|
||
createRedshiftConnection = gql.MutationField( | ||
name='createRedshiftConnection', | ||
args=[gql.Argument('input', gql.Ref('CreateRedshiftConnectionInput'))], | ||
type=RedshiftConnection, | ||
resolver=create_redshift_connection, | ||
) | ||
|
||
deleteRedshiftConnection = gql.MutationField( | ||
name='deleteRedshiftConnection', | ||
args=[gql.Argument('connectionUri', gql.NonNullableType(gql.String))], | ||
type=gql.Boolean, | ||
resolver=delete_redshift_connection, | ||
) |
30 changes: 30 additions & 0 deletions
30
backend/dataall/modules/redshift_datasets/api/connections/queries.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from dataall.base.api import gql | ||
from dataall.modules.redshift_datasets.api.connections.resolvers import ( | ||
list_environment_redshift_connections, | ||
list_redshift_connection_schemas, | ||
list_redshift_schema_tables, | ||
) | ||
|
||
listEnvironmentRedshiftConnections = gql.QueryField( | ||
name='listEnvironmentRedshiftConnections', | ||
args=[gql.Argument('filter', gql.Ref('ConnectionFilter'))], | ||
type=gql.Ref('RedshiftConnectionSearchResult'), | ||
resolver=list_environment_redshift_connections, | ||
) | ||
|
||
listRedshiftConnectionSchemas = gql.QueryField( | ||
name='listRedshiftConnectionSchemas', | ||
args=[gql.Argument('connectionUri', gql.NonNullableType(gql.String))], | ||
type=gql.ArrayType(gql.String), | ||
resolver=list_redshift_connection_schemas, | ||
) | ||
|
||
listRedshiftSchemaTables = gql.QueryField( | ||
name='listRedshiftSchemaTables', | ||
args=[ | ||
gql.Argument('connectionUri', gql.NonNullableType(gql.String)), | ||
gql.Argument('schema', gql.NonNullableType(gql.String)), | ||
], | ||
type=gql.ArrayType(gql.Ref('RedshiftTable')), | ||
resolver=list_redshift_schema_tables, | ||
) |
61 changes: 61 additions & 0 deletions
61
backend/dataall/modules/redshift_datasets/api/connections/resolvers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import logging | ||
from typing import Any | ||
from dataall.base.db import exceptions | ||
from dataall.base.api.context import Context | ||
|
||
from dataall.modules.redshift_datasets.api.connections.enums import RedshiftType | ||
from dataall.modules.redshift_datasets.services.redshift_connection_service import RedshiftConnectionService | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
def create_redshift_connection(context: Context, source, input=None): | ||
RequestValidator.validate_connection_creation_request(data=input) | ||
return RedshiftConnectionService.create_redshift_connection( | ||
uri=input['environmentUri'], admin_group=input['SamlGroupName'], data=input | ||
) | ||
|
||
|
||
def delete_redshift_connection(context: Context, source, connectionUri: str): | ||
RequestValidator.required_param('connectionUri', connectionUri) | ||
return RedshiftConnectionService.delete_redshift_connection(uri=connectionUri) | ||
|
||
|
||
def list_environment_redshift_connections(context: Context, source, filter: dict = None): | ||
environmentUri = filter['environmentUri'] | ||
RequestValidator.required_param('environmentUri', environmentUri) | ||
return RedshiftConnectionService.list_environment_redshift_connections(uri=environmentUri, filter=filter) | ||
|
||
|
||
def list_redshift_connection_schemas(context: Context, source, connectionUri): | ||
RequestValidator.required_param('connectionUri', connectionUri) | ||
return RedshiftConnectionService.list_connection_schemas(uri=connectionUri) | ||
|
||
|
||
def list_redshift_schema_tables(context: Context, source, connectionUri: str, schema: str): | ||
RequestValidator.required_param('connectionUri', connectionUri) | ||
RequestValidator.required_param('schema', schema) | ||
return RedshiftConnectionService.list_schema_tables(uri=connectionUri, schema=schema) | ||
|
||
|
||
class RequestValidator: | ||
def required_param(param_name: str, param_value: Any): | ||
if not param_value: | ||
raise exceptions.RequiredParameter(param_name) | ||
|
||
def validate_connection_creation_request(data): | ||
if not data: | ||
raise exceptions.RequiredParameter('data') | ||
|
||
RequestValidator.required_param('SamlGroupName', data.get('SamlGroupName')) | ||
RequestValidator.required_param('environmentUri', data.get('environmentUri')) | ||
RequestValidator.required_param('connectionName', data.get('connectionName')) | ||
RequestValidator.required_param('redshiftType', data.get('redshiftType')) | ||
RequestValidator.required_param('database', data.get('database')) | ||
if not data.get('redshiftUser') and not data.get('secretArn'): | ||
raise exceptions.RequiredParameter('RedshiftUser OR secretArn') | ||
if data.get('redshiftType') == RedshiftType.Serverless.value: | ||
RequestValidator.required_param('nameSpaceId', data.get('nameSpaceId')) | ||
RequestValidator.required_param('workgroup', data.get('workgroup')) | ||
if data.get('redshiftType') == RedshiftType.Cluster.value: | ||
RequestValidator.required_param('clusterId', data.get('clusterId')) |
40 changes: 40 additions & 0 deletions
40
backend/dataall/modules/redshift_datasets/api/connections/types.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from dataall.base.api import gql | ||
|
||
RedshiftConnection = gql.ObjectType( | ||
name='RedshiftConnection', | ||
fields=[ | ||
gql.Field('connectionUri', gql.ID), | ||
gql.Field('name', gql.String), | ||
gql.Field('connectionType', gql.String), | ||
gql.Field('SamlGroupName', gql.String), | ||
gql.Field('label', gql.String), | ||
gql.Field('redshiftType', gql.String), | ||
gql.Field('clusterId', gql.String), | ||
gql.Field('nameSpaceId', gql.String), | ||
gql.Field('workgroup', gql.String), | ||
gql.Field('database', gql.String), | ||
gql.Field('redshiftUser', gql.String), | ||
gql.Field('secretArn', gql.String), | ||
], | ||
) | ||
|
||
RedshiftConnectionSearchResult = gql.ObjectType( | ||
name='RedshiftConnectionSearchResult', | ||
fields=[ | ||
gql.Field('count', gql.Integer), | ||
gql.Field('page', gql.Integer), | ||
gql.Field('pages', gql.Integer), | ||
gql.Field('hasNext', gql.Boolean), | ||
gql.Field('hasPrevious', gql.Boolean), | ||
gql.Field('nodes', gql.ArrayType(RedshiftConnection)), | ||
], | ||
) | ||
|
||
RedshiftTable = gql.ObjectType( | ||
name='RedshiftTable', | ||
fields=[ | ||
gql.Field('name', gql.String), | ||
gql.Field('type', gql.String), | ||
gql.Field('alreadyAdded', gql.String), | ||
], | ||
) |
9 changes: 9 additions & 0 deletions
9
backend/dataall/modules/redshift_datasets/api/datasets/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from . import ( | ||
input_types, | ||
queries, | ||
mutations, | ||
resolvers, | ||
types, | ||
) | ||
|
||
__all__ = ['resolvers', 'types', 'input_types', 'queries', 'mutations'] |
1 change: 1 addition & 0 deletions
1
backend/dataall/modules/redshift_datasets/api/datasets/enums.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Contains the enums GraphQL mapping for enums""" |
Oops, something went wrong.