Skip to content

Commit

Permalink
Add support for Snowflake Streaming Loader to AWS (closes #92)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbeemster committed Apr 3, 2024
1 parent 0461951 commit 49469f3
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 0 deletions.
7 changes: 7 additions & 0 deletions terraform/aws/pipeline/default/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
| <a name="module_s3_pipeline_bucket"></a> [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 |
| <a name="module_sf_loader"></a> [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 |
| <a name="module_sf_transformer_wrj"></a> [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 |
| <a name="module_snowflake_streaming_loader_enriched"></a> [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 |

## Resources

Expand Down Expand Up @@ -97,6 +98,12 @@
| <a name="input_snowflake_loader_user"></a> [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no |
| <a name="input_snowflake_region"></a> [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no |
| <a name="input_snowflake_schema"></a> [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_streaming_account_url"></a> [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no |
| <a name="input_snowflake_streaming_database"></a> [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no |
| <a name="input_snowflake_streaming_enabled"></a> [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no |
| <a name="input_snowflake_streaming_loader_private_key"></a> [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no |
| <a name="input_snowflake_streaming_loader_user"></a> [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no |
| <a name="input_snowflake_streaming_schema"></a> [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_transformer_window_period_min"></a> [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no |
| <a name="input_snowflake_warehouse"></a> [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no |
| <a name="input_ssl_information"></a> [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | <pre>object({<br> enabled = bool<br> certificate_arn = string<br> })</pre> | <pre>{<br> "certificate_arn": "",<br> "enabled": false<br>}</pre> | no |
Expand Down
36 changes: 36 additions & 0 deletions terraform/aws/pipeline/default/target_snowflake_streaming.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
module "snowflake_streaming_loader_enriched" {
source = "snowplow-devops/snowflake-streaming-loader-ec2/aws"
version = "0.1.0"

accept_limited_use_license = var.accept_limited_use_license

count = var.snowflake_streaming_enabled ? 1 : 0

name = "${var.prefix}-sf-streaming-enriched"
vpc_id = var.vpc_id
subnet_ids = var.public_subnet_ids

in_stream_name = module.enriched_stream.name
bad_stream_name = module.bad_1_stream.name

snowflake_account_url = var.snowflake_streaming_account_url
snowflake_loader_user = var.snowflake_streaming_loader_user
snowflake_private_key = var.snowflake_streaming_loader_private_key
snowflake_database = var.snowflake_streaming_database
snowflake_schema = var.snowflake_streaming_schema

ssh_key_name = aws_key_pair.pipeline.key_name
ssh_ip_allowlist = var.ssh_ip_allowlist

iam_permissions_boundary = var.iam_permissions_boundary

telemetry_enabled = var.telemetry_enabled
user_provided_id = var.user_provided_id

kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity

tags = var.tags

cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
}
11 changes: 11 additions & 0 deletions terraform/aws/pipeline/default/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ snowflake_warehouse = "<WAREHOUSE>"
# This controls how often data will be loading into Snowflake
snowflake_transformer_window_period_min = 1

# --- Target: Snowflake Streaming
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
snowflake_streaming_enabled = false

snowflake_streaming_account_url = "<ACCOUNT>"
snowflake_streaming_loader_user = "<USER>"
snowflake_streaming_loader_private_key = "<PRIVATE_KEY>"
snowflake_streaming_database = "<DATABASE>"
snowflake_streaming_schema = "<SCHEMA>"

# --- Target: Databricks
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
Expand Down
39 changes: 39 additions & 0 deletions terraform/aws/pipeline/default/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,45 @@ variable "snowflake_transformer_window_period_min" {
default = 5
}

# --- Target: SnowflakeDB Streaming

variable "snowflake_streaming_enabled" {
description = "Whether to enable loading into a Snowflake Database with a Streaming Loader"
default = false
type = bool
}

variable "snowflake_streaming_account_url" {
description = "Snowflake account URL to use"
type = string
default = ""
}

variable "snowflake_streaming_loader_user" {
description = "The Snowflake user used by Snowflake Streaming Loader"
type = string
default = ""
}

variable "snowflake_streaming_loader_private_key" {
description = "The private key to use for the loader user"
type = string
sensitive = true
default = ""
}

variable "snowflake_streaming_database" {
description = "Snowflake database name"
type = string
default = ""
}

variable "snowflake_streaming_schema" {
description = "Snowflake schema name"
type = string
default = ""
}

# --- Target: Databricks

variable "databricks_enabled" {
Expand Down
7 changes: 7 additions & 0 deletions terraform/aws/pipeline/secure/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
| <a name="module_s3_pipeline_bucket"></a> [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 |
| <a name="module_sf_loader"></a> [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 |
| <a name="module_sf_transformer_wrj"></a> [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 |
| <a name="module_snowflake_streaming_loader_enriched"></a> [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 |

## Resources

Expand Down Expand Up @@ -97,6 +98,12 @@
| <a name="input_snowflake_loader_user"></a> [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no |
| <a name="input_snowflake_region"></a> [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no |
| <a name="input_snowflake_schema"></a> [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_streaming_account_url"></a> [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no |
| <a name="input_snowflake_streaming_database"></a> [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no |
| <a name="input_snowflake_streaming_enabled"></a> [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no |
| <a name="input_snowflake_streaming_loader_private_key"></a> [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no |
| <a name="input_snowflake_streaming_loader_user"></a> [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no |
| <a name="input_snowflake_streaming_schema"></a> [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no |
| <a name="input_snowflake_transformer_window_period_min"></a> [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no |
| <a name="input_snowflake_warehouse"></a> [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no |
| <a name="input_ssl_information"></a> [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer | <pre>object({<br> enabled = bool<br> certificate_arn = string<br> })</pre> | <pre>{<br> "certificate_arn": "",<br> "enabled": false<br>}</pre> | no |
Expand Down
38 changes: 38 additions & 0 deletions terraform/aws/pipeline/secure/target_snowflake_streaming.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
module "snowflake_streaming_loader_enriched" {
source = "snowplow-devops/snowflake-streaming-loader-ec2/aws"
version = "0.1.0"

accept_limited_use_license = var.accept_limited_use_license

count = var.snowflake_streaming_enabled ? 1 : 0

name = "${var.prefix}-sf-streaming-enriched"
vpc_id = var.vpc_id
subnet_ids = var.private_subnet_ids

in_stream_name = module.enriched_stream.name
bad_stream_name = module.bad_1_stream.name

snowflake_account_url = var.snowflake_streaming_account_url
snowflake_loader_user = var.snowflake_streaming_loader_user
snowflake_private_key = var.snowflake_streaming_loader_private_key
snowflake_database = var.snowflake_streaming_database
snowflake_schema = var.snowflake_streaming_schema

ssh_key_name = aws_key_pair.pipeline.key_name
ssh_ip_allowlist = var.ssh_ip_allowlist

iam_permissions_boundary = var.iam_permissions_boundary

telemetry_enabled = var.telemetry_enabled
user_provided_id = var.user_provided_id

kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity

associate_public_ip_address = false

tags = var.tags

cloudwatch_logs_enabled = var.cloudwatch_logs_enabled
cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days
}
11 changes: 11 additions & 0 deletions terraform/aws/pipeline/secure/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ snowflake_warehouse = "<WAREHOUSE>"
# This controls how often data will be loading into Snowflake
snowflake_transformer_window_period_min = 1

# --- Target: Snowflake Streaming
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
snowflake_streaming_enabled = false

snowflake_streaming_account_url = "<ACCOUNT>"
snowflake_streaming_loader_user = "<USER>"
snowflake_streaming_loader_private_key = "<PRIVATE_KEY>"
snowflake_streaming_database = "<DATABASE>"
snowflake_streaming_schema = "<SCHEMA>"

# --- Target: Databricks
# Follow the guide to get input values for the loader:
# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws
Expand Down
39 changes: 39 additions & 0 deletions terraform/aws/pipeline/secure/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,45 @@ variable "snowflake_transformer_window_period_min" {
default = 5
}

# --- Target: SnowflakeDB Streaming

variable "snowflake_streaming_enabled" {
description = "Whether to enable loading into a Snowflake Database with a Streaming Loader"
default = false
type = bool
}

variable "snowflake_streaming_account_url" {
description = "Snowflake account URL to use"
type = string
default = ""
}

variable "snowflake_streaming_loader_user" {
description = "The Snowflake user used by Snowflake Streaming Loader"
type = string
default = ""
}

variable "snowflake_streaming_loader_private_key" {
description = "The private key to use for the loader user"
type = string
sensitive = true
default = ""
}

variable "snowflake_streaming_database" {
description = "Snowflake database name"
type = string
default = ""
}

variable "snowflake_streaming_schema" {
description = "Snowflake schema name"
type = string
default = ""
}

# --- Target: Databricks

variable "databricks_enabled" {
Expand Down

0 comments on commit 49469f3

Please sign in to comment.