From b4a0c9a52f7e88ac4841ab41c8ec852bcbc2e585 Mon Sep 17 00:00:00 2001 From: jbeemster Date: Tue, 12 Mar 2024 21:39:41 +0100 Subject: [PATCH] Add support for Snowflake Streaming Loader to AWS (closes #92) --- terraform/aws/pipeline/default/README.md | 7 ++++ .../default/target_snowflake_streaming.tf | 36 +++++++++++++++++ .../aws/pipeline/default/terraform.tfvars | 11 ++++++ terraform/aws/pipeline/default/variables.tf | 39 +++++++++++++++++++ terraform/aws/pipeline/secure/README.md | 7 ++++ .../secure/target_snowflake_streaming.tf | 38 ++++++++++++++++++ .../aws/pipeline/secure/terraform.tfvars | 11 ++++++ terraform/aws/pipeline/secure/variables.tf | 39 +++++++++++++++++++ 8 files changed, 188 insertions(+) create mode 100644 terraform/aws/pipeline/default/target_snowflake_streaming.tf create mode 100644 terraform/aws/pipeline/secure/target_snowflake_streaming.tf diff --git a/terraform/aws/pipeline/default/README.md b/terraform/aws/pipeline/default/README.md index 694f17e..95d2148 100644 --- a/terraform/aws/pipeline/default/README.md +++ b/terraform/aws/pipeline/default/README.md @@ -36,6 +36,7 @@ | [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 | | [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 | | [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | +| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 | ## Resources @@ -97,6 +98,12 @@ | [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no | | [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no | | [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no | +| [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no | +| [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no | +| [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no | +| [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no | +| [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no | +| [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no | | [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no | | [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no | | [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | diff --git a/terraform/aws/pipeline/default/target_snowflake_streaming.tf b/terraform/aws/pipeline/default/target_snowflake_streaming.tf new file mode 100644 index 0000000..26f677d --- /dev/null +++ b/terraform/aws/pipeline/default/target_snowflake_streaming.tf @@ -0,0 +1,36 @@ +module "snowflake_streaming_loader_enriched" { + source = "snowplow-devops/snowflake-streaming-loader-ec2/aws" + version = "0.1.0" + + accept_limited_use_license = var.accept_limited_use_license + + count = var.snowflake_streaming_enabled ? 1 : 0 + + name = "${var.prefix}-sf-streaming-enriched" + vpc_id = var.vpc_id + subnet_ids = var.public_subnet_ids + + in_stream_name = module.enriched_stream.name + bad_stream_name = module.bad_1_stream.name + + snowflake_account_url = var.snowflake_streaming_account_url + snowflake_loader_user = var.snowflake_streaming_loader_user + snowflake_private_key = var.snowflake_streaming_loader_private_key + snowflake_database = var.snowflake_streaming_database + snowflake_schema = var.snowflake_streaming_schema + + ssh_key_name = aws_key_pair.pipeline.key_name + ssh_ip_allowlist = var.ssh_ip_allowlist + + iam_permissions_boundary = var.iam_permissions_boundary + + telemetry_enabled = var.telemetry_enabled + user_provided_id = var.user_provided_id + + kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity + + tags = var.tags + + cloudwatch_logs_enabled = var.cloudwatch_logs_enabled + cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days +} diff --git a/terraform/aws/pipeline/default/terraform.tfvars b/terraform/aws/pipeline/default/terraform.tfvars index 1c48703..7f5506b 100644 --- a/terraform/aws/pipeline/default/terraform.tfvars +++ b/terraform/aws/pipeline/default/terraform.tfvars @@ -72,6 +72,17 @@ snowflake_warehouse = "" # This controls how often data will be loading into Snowflake snowflake_transformer_window_period_min = 1 +# --- Target: Snowflake Streaming +# Follow the guide to get input values for the loader: +# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws +snowflake_streaming_enabled = false + +snowflake_streaming_account_url = "" +snowflake_streaming_loader_user = "" +snowflake_streaming_loader_private_key = "" +snowflake_streaming_database = "" +snowflake_streaming_schema = "" + # --- Target: Databricks # Follow the guide to get input values for the loader: # https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws diff --git a/terraform/aws/pipeline/default/variables.tf b/terraform/aws/pipeline/default/variables.tf index 397caab..1f5e675 100644 --- a/terraform/aws/pipeline/default/variables.tf +++ b/terraform/aws/pipeline/default/variables.tf @@ -237,6 +237,45 @@ variable "snowflake_transformer_window_period_min" { default = 5 } +# --- Target: SnowflakeDB Streaming + +variable "snowflake_streaming_enabled" { + description = "Whether to enable loading into a Snowflake Database with a Streaming Loader" + default = false + type = bool +} + +variable "snowflake_streaming_account_url" { + description = "Snowflake account URL to use" + type = string + default = "" +} + +variable "snowflake_streaming_loader_user" { + description = "The Snowflake user used by Snowflake Streaming Loader" + type = string + default = "" +} + +variable "snowflake_streaming_loader_private_key" { + description = "The private key to use for the loader user" + type = string + sensitive = true + default = "" +} + +variable "snowflake_streaming_database" { + description = "Snowflake database name" + type = string + default = "" +} + +variable "snowflake_streaming_schema" { + description = "Snowflake schema name" + type = string + default = "" +} + # --- Target: Databricks variable "databricks_enabled" { diff --git a/terraform/aws/pipeline/secure/README.md b/terraform/aws/pipeline/secure/README.md index 5773ec3..deaa851 100644 --- a/terraform/aws/pipeline/secure/README.md +++ b/terraform/aws/pipeline/secure/README.md @@ -36,6 +36,7 @@ | [s3\_pipeline\_bucket](#module\_s3\_pipeline\_bucket) | snowplow-devops/s3-bucket/aws | 0.2.0 | | [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-ec2/aws | 0.3.0 | | [sf\_transformer\_wrj](#module\_sf\_transformer\_wrj) | snowplow-devops/transformer-kinesis-ec2/aws | 0.4.0 | +| [snowflake\_streaming\_loader\_enriched](#module\_snowflake\_streaming\_loader\_enriched) | snowplow-devops/snowflake-streaming-loader-ec2/aws | 0.1.0 | ## Resources @@ -97,6 +98,12 @@ | [snowflake\_loader\_user](#input\_snowflake\_loader\_user) | The Snowflake user used by Snowflake Loader | `string` | `""` | no | | [snowflake\_region](#input\_snowflake\_region) | Region of Snowflake account | `string` | `""` | no | | [snowflake\_schema](#input\_snowflake\_schema) | Snowflake schema name | `string` | `""` | no | +| [snowflake\_streaming\_account\_url](#input\_snowflake\_streaming\_account\_url) | Snowflake account URL to use | `string` | `""` | no | +| [snowflake\_streaming\_database](#input\_snowflake\_streaming\_database) | Snowflake database name | `string` | `""` | no | +| [snowflake\_streaming\_enabled](#input\_snowflake\_streaming\_enabled) | Whether to enable loading into a Snowflake Database with a Streaming Loader | `bool` | `false` | no | +| [snowflake\_streaming\_loader\_private\_key](#input\_snowflake\_streaming\_loader\_private\_key) | The private key to use for the loader user | `string` | `""` | no | +| [snowflake\_streaming\_loader\_user](#input\_snowflake\_streaming\_loader\_user) | The Snowflake user used by Snowflake Streaming Loader | `string` | `""` | no | +| [snowflake\_streaming\_schema](#input\_snowflake\_streaming\_schema) | Snowflake schema name | `string` | `""` | no | | [snowflake\_transformer\_window\_period\_min](#input\_snowflake\_transformer\_window\_period\_min) | Frequency to emit transforming finished message - 5,10,15,20,30,60 etc minutes | `number` | `5` | no | | [snowflake\_warehouse](#input\_snowflake\_warehouse) | Snowflake warehouse name | `string` | `""` | no | | [ssl\_information](#input\_ssl\_information) | The ARN of an Amazon Certificate Manager certificate to bind to the load balancer |
object({
enabled = bool
certificate_arn = string
})
|
{
"certificate_arn": "",
"enabled": false
}
| no | diff --git a/terraform/aws/pipeline/secure/target_snowflake_streaming.tf b/terraform/aws/pipeline/secure/target_snowflake_streaming.tf new file mode 100644 index 0000000..858d856 --- /dev/null +++ b/terraform/aws/pipeline/secure/target_snowflake_streaming.tf @@ -0,0 +1,38 @@ +module "snowflake_streaming_loader_enriched" { + source = "snowplow-devops/snowflake-streaming-loader-ec2/aws" + version = "0.1.0" + + accept_limited_use_license = var.accept_limited_use_license + + count = var.snowflake_streaming_enabled ? 1 : 0 + + name = "${var.prefix}-sf-streaming-enriched" + vpc_id = var.vpc_id + subnet_ids = var.private_subnet_ids + + in_stream_name = module.enriched_stream.name + bad_stream_name = module.bad_1_stream.name + + snowflake_account_url = var.snowflake_streaming_account_url + snowflake_loader_user = var.snowflake_streaming_loader_user + snowflake_private_key = var.snowflake_streaming_loader_private_key + snowflake_database = var.snowflake_streaming_database + snowflake_schema = var.snowflake_streaming_schema + + ssh_key_name = aws_key_pair.pipeline.key_name + ssh_ip_allowlist = var.ssh_ip_allowlist + + iam_permissions_boundary = var.iam_permissions_boundary + + telemetry_enabled = var.telemetry_enabled + user_provided_id = var.user_provided_id + + kcl_write_max_capacity = var.pipeline_kcl_write_max_capacity + + associate_public_ip_address = false + + tags = var.tags + + cloudwatch_logs_enabled = var.cloudwatch_logs_enabled + cloudwatch_logs_retention_days = var.cloudwatch_logs_retention_days +} diff --git a/terraform/aws/pipeline/secure/terraform.tfvars b/terraform/aws/pipeline/secure/terraform.tfvars index 96e1a31..b48e94f 100644 --- a/terraform/aws/pipeline/secure/terraform.tfvars +++ b/terraform/aws/pipeline/secure/terraform.tfvars @@ -74,6 +74,17 @@ snowflake_warehouse = "" # This controls how often data will be loading into Snowflake snowflake_transformer_window_period_min = 1 +# --- Target: Snowflake Streaming +# Follow the guide to get input values for the loader: +# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws +snowflake_streaming_enabled = false + +snowflake_streaming_account_url = "" +snowflake_streaming_loader_user = "" +snowflake_streaming_loader_private_key = "" +snowflake_streaming_database = "" +snowflake_streaming_schema = "" + # --- Target: Databricks # Follow the guide to get input values for the loader: # https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start-aws diff --git a/terraform/aws/pipeline/secure/variables.tf b/terraform/aws/pipeline/secure/variables.tf index 9387b97..912af35 100644 --- a/terraform/aws/pipeline/secure/variables.tf +++ b/terraform/aws/pipeline/secure/variables.tf @@ -236,6 +236,45 @@ variable "snowflake_transformer_window_period_min" { default = 5 } +# --- Target: SnowflakeDB Streaming + +variable "snowflake_streaming_enabled" { + description = "Whether to enable loading into a Snowflake Database with a Streaming Loader" + default = false + type = bool +} + +variable "snowflake_streaming_account_url" { + description = "Snowflake account URL to use" + type = string + default = "" +} + +variable "snowflake_streaming_loader_user" { + description = "The Snowflake user used by Snowflake Streaming Loader" + type = string + default = "" +} + +variable "snowflake_streaming_loader_private_key" { + description = "The private key to use for the loader user" + type = string + sensitive = true + default = "" +} + +variable "snowflake_streaming_database" { + description = "Snowflake database name" + type = string + default = "" +} + +variable "snowflake_streaming_schema" { + description = "Snowflake schema name" + type = string + default = "" +} + # --- Target: Databricks variable "databricks_enabled" {