Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Snowflake Streaming Loader on AWS #93

Merged
merged 1 commit into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions terraform/aws/pipeline/default/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
| <a name="module_s3_pipeline_bucket"></a> [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 |
| <a name="module_sf_loader"></a> [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 |
| <a name="module_sf_transformer_wrj"></a> [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 |
| <a name="module_snowflake_streaming_loader_enriched"></a> [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 |

## Resources

Expand Down Expand Up @@ -97,6 +98,12 @@
| <a name="input_snowflake_loader_user"></a> [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no |
| <a name="input_snowflake_region"></a> [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no |
| <a name="input_snowflake_schema"></a> [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_streaming_account_url"></a> [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no |
| <a name="input_snowflake_streaming_database"></a> [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no |
| <a name="input_snowflake_streaming_enabled"></a> [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no |
| <a name="input_snowflake_streaming_loader_private_key"></a> [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no |
| <a name="input_snowflake_streaming_loader_user"></a> [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no |
| <a name="input_snowflake_streaming_schema"></a> [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_transformer_window_period_min"></a> [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no |
| <a name="input_snowflake_warehouse"></a> [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no |
| <a name="input_ssl_information"></a> [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | <pre>object({<br> enabled = bool<br> certificate_arn = string<br> })</pre> | <pre>{<br> "certificate_arn": "",<br> "enabled": false<br>}</pre> | no |
Expand Down
36 changes: 36 additions & 0 deletions terraform/aws/pipeline/default/target_snowflake_streaming.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
module "snowflake_streaming_loader_enriched" {
source = "snowplow-devops/snowflake-streaming-loader-ec2/aws"
version = "0.1.0"

accept_limited_use_license = var.accept_limited_use_license

count = var.snowflake_streaming_enabled ? 1 : 0

name = "${var.prefix}-sf-streaming-enriched"
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids

in_stream_name = module.enriched_stream.name
bad_stream_name = module.bad_1_stream.name

snowflake_account_url = var.snowflake_streaming_account_url
snowflake_loader_user = var.snowflake_streaming_loader_user
snowflake_private_key = var.snowflake_streaming_loader_private_key
snowflake_database = var.snowflake_streaming_database
snowflake_schema = var.snowflake_streaming_schema

ssh_key_name = aws_key_pair.pipeline.key_name
ssh_ip_allowlist = var.ssh_ip_allowlist

iam_permissions_boundary = var.iam_permissions_boundary

telemetry_enabled = var.telemetry_enabled
user_provided_id = var.user_provided_id

kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity

tags = var.tags

cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
}
11 changes: 11 additions & 0 deletions terraform/aws/pipeline/default/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ snowflake_warehouse = "<WAREHOUSE>"
# This controls how often data will be loading into Snowflake
snowflake_transformer_window_period_min = 1

# --- Target: Snowflake Streaming
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
snowflake_streaming_enabled = false

snowflake_streaming_account_url = "<ACCOUNT>"
snowflake_streaming_loader_user = "<USER>"
snowflake_streaming_loader_private_key = "<PRIVATE_KEY>"
snowflake_streaming_database = "<DATABASE>"
snowflake_streaming_schema = "<SCHEMA>"

# --- Target: Databricks
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
Expand Down
39 changes: 39 additions & 0 deletions terraform/aws/pipeline/default/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,45 @@ variable "snowflake_transformer_window_period_min" {
default = 5
}

# --- Target: SnowflakeDB Streaming

variable "snowflake_streaming_enabled" {
description = "Whether to enable loading into a Snowflake Database with a Streaming Loader"
default = false
type = bool
}

variable "snowflake_streaming_account_url" {
description = "Snowflake account URL to use"
type = string
default = ""
}

variable "snowflake_streaming_loader_user" {
description = "The Snowflake user used by Snowflake Streaming Loader"
type = string
default = ""
}

variable "snowflake_streaming_loader_private_key" {
description = "The private key to use for the loader user"
type = string
sensitive = true
default = ""
}

variable "snowflake_streaming_database" {
description = "Snowflake database name"
type = string
default = ""
}

variable "snowflake_streaming_schema" {
description = "Snowflake schema name"
type = string
default = ""
}

# --- Target: Databricks

variable "databricks_enabled" {
Expand Down
7 changes: 7 additions & 0 deletions terraform/aws/pipeline/secure/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
| <a name="module_s3_pipeline_bucket"></a> [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 |
| <a name="module_sf_loader"></a> [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 |
| <a name="module_sf_transformer_wrj"></a> [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 |
| <a name="module_snowflake_streaming_loader_enriched"></a> [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 |

## Resources

Expand Down Expand Up @@ -97,6 +98,12 @@
| <a name="input_snowflake_loader_user"></a> [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no |
| <a name="input_snowflake_region"></a> [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no |
| <a name="input_snowflake_schema"></a> [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_streaming_account_url"></a> [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no |
| <a name="input_snowflake_streaming_database"></a> [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no |
| <a name="input_snowflake_streaming_enabled"></a> [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no |
| <a name="input_snowflake_streaming_loader_private_key"></a> [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no |
| <a name="input_snowflake_streaming_loader_user"></a> [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no |
| <a name="input_snowflake_streaming_schema"></a> [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_transformer_window_period_min"></a> [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no |
| <a name="input_snowflake_warehouse"></a> [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no |
| <a name="input_ssl_information"></a> [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | <pre>object({<br> enabled = bool<br> certificate_arn = string<br> })</pre> | <pre>{<br> "certificate_arn": "",<br> "enabled": false<br>}</pre> | no |
Expand Down
38 changes: 38 additions & 0 deletions terraform/aws/pipeline/secure/target_snowflake_streaming.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
module "snowflake_streaming_loader_enriched" {
source = "snowplow-devops/snowflake-streaming-loader-ec2/aws"
version = "0.1.0"

accept_limited_use_license = var.accept_limited_use_license

count = var.snowflake_streaming_enabled ? 1 : 0

name = "${var.prefix}-sf-streaming-enriched"
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids

in_stream_name = module.enriched_stream.name
bad_stream_name = module.bad_1_stream.name

snowflake_account_url = var.snowflake_streaming_account_url
snowflake_loader_user = var.snowflake_streaming_loader_user
snowflake_private_key = var.snowflake_streaming_loader_private_key
snowflake_database = var.snowflake_streaming_database
snowflake_schema = var.snowflake_streaming_schema

ssh_key_name = aws_key_pair.pipeline.key_name
ssh_ip_allowlist = var.ssh_ip_allowlist

iam_permissions_boundary = var.iam_permissions_boundary

telemetry_enabled = var.telemetry_enabled
user_provided_id = var.user_provided_id

kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity

associate_public_ip_address = false

tags = var.tags

cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
}
11 changes: 11 additions & 0 deletions terraform/aws/pipeline/secure/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ snowflake_warehouse = "<WAREHOUSE>"
# This controls how often data will be loading into Snowflake
snowflake_transformer_window_period_min = 1

# --- Target: Snowflake Streaming
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
snowflake_streaming_enabled = false

snowflake_streaming_account_url = "<ACCOUNT>"
snowflake_streaming_loader_user = "<USER>"
snowflake_streaming_loader_private_key = "<PRIVATE_KEY>"
snowflake_streaming_database = "<DATABASE>"
snowflake_streaming_schema = "<SCHEMA>"

# --- Target: Databricks
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
Expand Down
39 changes: 39 additions & 0 deletions terraform/aws/pipeline/secure/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,45 @@ variable "snowflake_transformer_window_period_min" {
default = 5
}

# --- Target: SnowflakeDB Streaming

variable "snowflake_streaming_enabled" {
description = "Whether to enable loading into a Snowflake Database with a Streaming Loader"
default = false
type = bool
}

variable "snowflake_streaming_account_url" {
description = "Snowflake account URL to use"
type = string
default = ""
}

variable "snowflake_streaming_loader_user" {
description = "The Snowflake user used by Snowflake Streaming Loader"
type = string
default = ""
}

variable "snowflake_streaming_loader_private_key" {
description = "The private key to use for the loader user"
type = string
sensitive = true
default = ""
}

variable "snowflake_streaming_database" {
description = "Snowflake database name"
type = string
default = ""
}

variable "snowflake_streaming_schema" {
description = "Snowflake schema name"
type = string
default = ""
}

# --- Target: Databricks

variable "databricks_enabled" {
Expand Down
Loading