Skip to content

Commit

Permalink
Accept user input for version and config (close #18)
Browse files Browse the repository at this point in the history
  • Loading branch information
oguzhanunlu committed Dec 11, 2023
1 parent 69b60ad commit 732ea5a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 99 deletions.
19 changes: 4 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ module "transformer_kinesis" {
source = "snowplow-devops/transformer-kinesis-ec2/aws"
name = var.name
app_version = "5.8.0"
vpc_id = var.vpc_id
subnet_ids = var.subnet_ids
stream_name = module.enriched_stream.name
config_b64 = "<base64_encoded_config>"
s3_bucket_name = var.transformed_bucket
s3_bucket_object_prefix = "transformed/good"
window_period_min = 10
Expand Down Expand Up @@ -111,40 +113,27 @@ module "transformer_kinesis" {
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_name"></a> [name](#input\_name) | A name which will be pre-pended to the resources created | `string` | n/a | yes |
| <a name="input_app_version"></a> [name](#input\_app\_version) | Version of transformer-kinesis | `string` | "5.8.0" | no |
| <a name="input_s3_bucket_name"></a> [s3\_bucket\_name](#input\_s3\_bucket\_name) | The name of the S3 bucket events will be loaded into | `string` | n/a | yes |
| <a name="input_s3_bucket_object_prefix"></a> [s3\_bucket\_object\_prefix](#input\_s3\_bucket\_object\_prefix) | An optional prefix under which Snowplow data will be saved | `string` | n/a | yes |
| <a name="input_ssh_key_name"></a> [ssh\_key\_name](#input\_ssh\_key\_name) | The name of the SSH key-pair to attach to all EC2 nodes deployed | `string` | n/a | yes |
| <a name="input_stream_name"></a> [stream\_name](#input\_stream\_name) | The name of the input kinesis stream that the Transformer will pull data from | `string` | n/a | yes |
| <a name="input_subnet_ids"></a> [subnet\_ids](#input\_subnet\_ids) | The list of subnets to deploy Transformer across | `list(string)` | n/a | yes |
| <a name="input_vpc_id"></a> [vpc\_id](#input\_vpc\_id) | The VPC to deploy Transformer within | `string` | n/a | yes |
| <a name="input_window_period_min"></a> [window\_period\_min](#input\_window\_period\_min) | Frequency to emit loading finished message - 5,10,15,20,30,60 etc minutes | `number` | n/a | yes |
| <a name="input_amazon_linux_2_ami_id"></a> [amazon\_linux\_2\_ami\_id](#input\_amazon\_linux\_2\_ami\_id) | The AMI ID to use which must be based of of Amazon Linux 2; by default the latest community version is used | `string` | `""` | no |
| <a name="input_associate_public_ip_address"></a> [associate\_public\_ip\_address](#input\_associate\_public\_ip\_address) | Whether to assign a public ip address to this instance | `bool` | `true` | no |
| <a name="input_cloudwatch_logs_enabled"></a> [cloudwatch\_logs\_enabled](#input\_cloudwatch\_logs\_enabled) | Whether application logs should be reported to CloudWatch | `bool` | `true` | no |
| <a name="input_cloudwatch_logs_retention_days"></a> [cloudwatch\_logs\_retention\_days](#input\_cloudwatch\_logs\_retention\_days) | The length of time in days to retain logs for | `number` | `7` | no |
| <a name="input_custom_iglu_resolvers"></a> [custom\_iglu\_resolvers](#input\_custom\_iglu\_resolvers) | The custom Iglu Resolvers that will be used by Transformer | <pre>list(object({<br> name = string<br> priority = number<br> uri = string<br> api_key = string<br> vendor_prefixes = list(string)<br> }))</pre> | `[]` | no |
| <a name="input_default_iglu_resolvers"></a> [default\_iglu\_resolvers](#input\_default\_iglu\_resolvers) | The default Iglu Resolvers that will be used by Transformer | <pre>list(object({<br> name = string<br> priority = number<br> uri = string<br> api_key = string<br> vendor_prefixes = list(string)<br> }))</pre> | <pre>[<br> {<br> "api_key": "",<br> "name": "Iglu Central",<br> "priority": 10,<br> "uri": "http://iglucentral.com",<br> "vendor_prefixes": []<br> },<br> {<br> "api_key": "",<br> "name": "Iglu Central - Mirror 01",<br> "priority": 20,<br> "uri": "http://mirror01.iglucentral.com",<br> "vendor_prefixes": []<br> }<br>]</pre> | no |
| <a name="input_default_shred_format"></a> [default\_shred\_format](#input\_default\_shred\_format) | Format used by default when format type is 'shred' (TSV or JSON) | `string` | `"TSV"` | no |
| <a name="input_iam_permissions_boundary"></a> [iam\_permissions\_boundary](#input\_iam\_permissions\_boundary) | The permissions boundary ARN to set on IAM roles created | `string` | `""` | no |
| <a name="input_initial_position"></a> [initial\_position](#input\_initial\_position) | Where to start processing the input Kinesis Stream from (TRIM\_HORIZON or LATEST) | `string` | `"TRIM_HORIZON"` | no |
| <a name="input_instance_type"></a> [instance\_type](#input\_instance\_type) | The instance type to use | `string` | `"t3a.small"` | no |
| <a name="input_java_opts"></a> [java\_opts](#input\_java\_opts) | Custom JAVA Options | `string` | `"-Dorg.slf4j.simpleLogger.defaultLogLevel=info -XX:MinRAMPercentage=50 -XX:MaxRAMPercentage=75"` | no |
| <a name="input_config_b64"></a> [config\_b64](#input\_config\_b64) | base64 encoded configuration | `string` | `` | yes |
| <a name="input_kcl_read_max_capacity"></a> [kcl\_read\_max\_capacity](#input\_kcl\_read\_max\_capacity) | The maximum READ capacity for the KCL DynamoDB table | `number` | `10` | no |
| <a name="input_kcl_read_min_capacity"></a> [kcl\_read\_min\_capacity](#input\_kcl\_read\_min\_capacity) | The minimum READ capacity for the KCL DynamoDB table | `number` | `1` | no |
| <a name="input_kcl_write_max_capacity"></a> [kcl\_write\_max\_capacity](#input\_kcl\_write\_max\_capacity) | The maximum WRITE capacity for the KCL DynamoDB table | `number` | `10` | no |
| <a name="input_kcl_write_min_capacity"></a> [kcl\_write\_min\_capacity](#input\_kcl\_write\_min\_capacity) | The minimum WRITE capacity for the KCL DynamoDB table | `number` | `1` | no |
| <a name="input_schemas_json"></a> [schemas\_json](#input\_schemas\_json) | List of schemas to get shredded as JSON | `list(string)` | `[]` | no |
| <a name="input_schemas_skip"></a> [schemas\_skip](#input\_schemas\_skip) | List of schemas to not get shredded (and thus not loaded) | `list(string)` | `[]` | no |
| <a name="input_schemas_tsv"></a> [schemas\_tsv](#input\_schemas\_tsv) | List of schemas to get shredded as TSV | `list(string)` | `[]` | no |
| <a name="input_sns_topic_arn"></a> [sns\_topic\_arn](#input\_sns\_topic\_arn) | The ARN of the SNS topic that Transformer will send the transforming complete message. Either `sqs_queue_name` or `sns_topic_arn` needs to be set | `string` | `""` | no |
| <a name="input_sqs_queue_name"></a> [sqs\_queue\_name](#input\_sqs\_queue\_name) | The name of the SQS queue that Transformer will send the transforming complete message. Either `sqs_queue_name` or `sns_topic_arn` needs to be set | `string` | `""` | no |
| <a name="input_ssh_ip_allowlist"></a> [ssh\_ip\_allowlist](#input\_ssh\_ip\_allowlist) | The list of CIDR ranges to allow SSH traffic from | `list(any)` | <pre>[<br> "0.0.0.0/0"<br>]</pre> | no |
| <a name="input_tags"></a> [tags](#input\_tags) | The tags to append to this resource | `map(string)` | `{}` | no |
| <a name="input_telemetry_enabled"></a> [telemetry\_enabled](#input\_telemetry\_enabled) | Whether or not to send telemetry information back to Snowplow Analytics Ltd | `bool` | `true` | no |
| <a name="input_transformation_type"></a> [transformation\_type](#input\_transformation\_type) | Type of the transformation (shred or widerow) | `string` | `"shred"` | no |
| <a name="input_transformer_compression"></a> [transformer\_compression](#input\_transformer\_compression) | Transformer output compression, GZIP or NONE | `string` | `"GZIP"` | no |
| <a name="input_user_provided_id"></a> [user\_provided\_id](#input\_user\_provided\_id) | An optional unique identifier to identify the telemetry events emitted by this stack | `string` | `""` | no |
| <a name="input_widerow_file_format"></a> [widerow\_file\_format](#input\_widerow\_file\_format) | The output file\_format from the widerow transformation\_type selected (json or parquet) | `string` | `"json"` | no |

## Outputs

Expand Down
39 changes: 5 additions & 34 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ locals {
module_name = "transformer-kinesis-ec2"
module_version = "0.3.4"

app_name = "transformer-kinesis"
app_version = "5.6.0"
app_name = "transformer-kinesis"

local_tags = {
Name = var.name
app_name = local.app_name
app_version = local.app_version
app_version = var.app_version
module_name = local.module_name
module_version = local.module_version
}
Expand Down Expand Up @@ -68,7 +67,7 @@ module "telemetry" {
cloud = "AWS"
region = data.aws_region.current.name
app_name = local.app_name
app_version = local.app_version
app_version = var.app_version
module_name = local.module_name
module_version = local.module_version
}
Expand Down Expand Up @@ -336,38 +335,10 @@ locals {

iglu_resolver = templatefile("${path.module}/templates/iglu_resolver.json.tmpl", { resolvers = jsonencode(local.resolvers) })

config = templatefile("${path.module}/templates/config.json.tmpl", {
app_name = var.name
stream_name = var.stream_name
region = data.aws_region.current.name
initial_position = var.initial_position
transformed_output = local.s3_path
compression = var.transformer_compression
window_period = "${var.window_period_min} minutes"
sqs_enabled = local.sqs_enabled
sqs_queue_name = var.sqs_queue_name
sns_topic_arn = var.sns_topic_arn
transformation_type = var.transformation_type
default_shred_format = var.default_shred_format
schemas_json = jsonencode(var.schemas_json)
schemas_tsv = jsonencode(var.schemas_tsv)
schemas_skip = jsonencode(var.schemas_skip)
widerow_file_format = var.widerow_file_format

telemetry_disable = !var.telemetry_enabled
telemetry_collector_uri = join("", module.telemetry.*.collector_uri)
telemetry_collector_port = 443
telemetry_secure = true
telemetry_user_provided_id = var.user_provided_id
telemetry_auto_gen_id = join("", module.telemetry.*.auto_generated_id)
telemetry_module_name = local.module_name
telemetry_module_version = local.module_version
})

user_data = templatefile("${path.module}/templates/user-data.sh.tmpl", {
config_b64 = base64encode(local.config)
config_b64 = var.config_b64
iglu_resolver_b64 = base64encode(local.iglu_resolver)
version = local.app_version
version = var.app_version

telemetry_script = join("", module.telemetry.*.amazon_linux_2_user_data)

Expand Down
58 changes: 8 additions & 50 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ variable "name" {
type = string
}

variable "app_version" {
description = "Version of the transformer"
type = string
default = "5.8.0"
}

variable "vpc_id" {
description = "The VPC to deploy Transformer within"
type = string
Expand Down Expand Up @@ -103,9 +109,8 @@ variable "stream_name" {
type = string
}

variable "initial_position" {
description = "Where to start processing the input Kinesis Stream from (TRIM_HORIZON or LATEST)"
default = "TRIM_HORIZON"
variable "config_b64" {
description = "Base64 encoded config"
type = string
}

Expand All @@ -119,17 +124,6 @@ variable "s3_bucket_object_prefix" {
type = string
}

variable "transformer_compression" {
description = "Transformer output compression, GZIP or NONE"
default = "GZIP"
type = string
}

variable "window_period_min" {
description = "Frequency to emit loading finished message - 5,10,15,20,30,60 etc minutes"
type = number
}

variable "sqs_queue_name" {
description = "The name of the SQS queue that Transformer will send the transforming complete message. Either `sqs_queue_name` or `sns_topic_arn` needs to be set"
default = ""
Expand All @@ -142,42 +136,6 @@ variable "sns_topic_arn" {
type = string
}

variable "transformation_type" {
description = "Type of the transformation (shred or widerow)"
default = "shred"
type = string
}

variable "default_shred_format" {
description = "Format used by default when format type is 'shred' (TSV or JSON)"
default = "TSV"
type = string
}

variable "schemas_json" {
description = "List of schemas to get shredded as JSON"
default = []
type = list(string)
}

variable "schemas_tsv" {
description = "List of schemas to get shredded as TSV"
default = []
type = list(string)
}

variable "schemas_skip" {
description = "List of schemas to not get shredded (and thus not loaded)"
default = []
type = list(string)
}

variable "widerow_file_format" {
description = "The output file_format from the widerow transformation_type selected (json or parquet)"
default = "json"
type = string
}

# --- Iglu Resolver

variable "default_iglu_resolvers" {
Expand Down

0 comments on commit 732ea5a

Please sign in to comment.