Skip to content

Commit

Permalink
feat: add support for suppressing notifications (#3)
Browse files Browse the repository at this point in the history
Users can specify MSK cluster states that should not trigger a notification.
- Updated pre-commit hooks to use newer versions:
  - antonbabenko/[email protected]
  - pre-commit/[email protected]
  • Loading branch information
stefanfreitag authored Oct 18, 2023
1 parent 5026a20 commit 45af487
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 37 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ terraform.rc

# Python virtual environment
.venv

# Lambda zip directory
out/
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.83.2
rev: v1.83.5
hooks:
- id: terraform_fmt
- id: terraform_validate
Expand All @@ -27,7 +27,7 @@ repos:
- --args=--quiet
- --args=--skip-check CKV_AWS_116,CKV_AWS_117,CKV_AWS_173,CKV_AWS_272
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-merge-conflict
- id: end-of-file-fixer
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ No modules.
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_email"></a> [email](#input\_email) | List of e-mail addresses subscribing to the SNS topic. Default is empty list. | `list(string)` | `[]` | no |
| <a name="input_ignore_states"></a> [ignore\_states](#input\_ignore\_states) | Suppress warnings for the listed MSK states. Default: ['MAINTENANCE'] | `list(string)` | <pre>[<br> "MAINTENANCE"<br>]</pre> | no |
| <a name="input_log_retion_period_in_days"></a> [log\_retion\_period\_in\_days](#input\_log\_retion\_period\_in\_days) | Number of days logs will be retained. Default is 365 days. | `number` | `365` | no |
| <a name="input_schedule_expression"></a> [schedule\_expression](#input\_schedule\_expression) | The schedule expression for the CloudWatch event rule. Default is 'rate(15 minutes)'. | `string` | `"rate(15 minutes)"` | no |
| <a name="input_tags"></a> [tags](#input\_tags) | A map of tags to add to all resources. Default is empty map. | `map(string)` | `{}` | no |

Expand Down
2 changes: 1 addition & 1 deletion data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ data "aws_region" "current" {}
data "archive_file" "status_checker_code" {
type = "zip"
source_dir = "${path.module}/functions/check-msk-status/"
output_path = "${path.module}/python/hello-python.zip"
output_path = "${path.module}/out/check-msk-status.zip"
}
2 changes: 1 addition & 1 deletion examples/01_default_configuration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ No inputs.
## Outputs

No outputs.
<!-- END_TF_DOCS -->
<!-- END_TF_DOCS -->
56 changes: 33 additions & 23 deletions functions/check-msk-status/index.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,47 @@
import boto3
import os


def lambda_handler(event, context):
LAMBDASNSTOPIC = os.environ['SNS_TOPIC_ARN']
region = 'eu-central-1'
LAMBDASNSTOPIC = os.environ["SNS_TOPIC_ARN"]
SUPPRESS_STATES = os.environ["SUPPRESS_STATES"].split(",")
region = "eu-central-1"
# Create an MSK client
client = boto3.client('kafka', region_name=region)
client = boto3.client("kafka", region_name=region)
# Retrieve a list of clusters
response = client.list_clusters()
# Extract the cluster ARNs from the response
cluster_arns = response['ClusterInfoList']
cluster_arns = response["ClusterInfoList"]

valid_states = ["ACTIVE"] + SUPPRESS_STATES
print(
"Notifications suppressed for these MSK states: {}".format(
", ".join(valid_states)
)
)

for cluster in cluster_arns:
arn = cluster['ClusterArn']
arn = cluster["ClusterArn"]
response = client.describe_cluster(ClusterArn=arn)
status = response['ClusterInfo']['State']
sns_client = boto3.client('sns')

if status != 'ACTIVE':
print("The MSK cluster: {} needs attention.".format(arn))
sns_client.publish(TopicArn=LAMBDASNSTOPIC,
Message="MSK cluster: " + arn + " needs attention. The status is: " + status,
Subject="MSK Health Warning!")
status = response["ClusterInfo"]["State"]
print("The cluster is in state {}.".format(status))
sns_client = boto3.client("sns")
if status not in valid_states:
print("The MSK cluster: {} needs attention.".format(arn))
sns_client.publish(
TopicArn=LAMBDASNSTOPIC,
Message="MSK cluster: "
+ arn
+ " needs attention. The status is: "
+ status,
Subject="MSK Health Warning!",
)
else:
print(
"The MSK cluster: {} is in a healthy state, and is reachable and available for use.".format(
arn))
print(
"The MSK cluster: {} is in a healthy state, and is reachable and available for use.".format(
arn
)
)

# Return the status
return {
'statusCode': 200,
'body': 'OK'
}

if __name__ == '__main__':
lambda_handler(None, None)
return {"statusCode": 200, "body": "OK"}
20 changes: 10 additions & 10 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ resource "aws_sns_topic_subscription" "msk_health_sns_topic_email_target" {

# IAM role
resource "aws_iam_role" "msk_health_lambda_role" {
name = "msk-health-lambda-role-${random_id.id.hex}"

name = "msk-health-lambda-role-${random_id.id.hex}"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
Expand Down Expand Up @@ -55,8 +54,7 @@ resource "aws_iam_policy" "msk_health_lambda_role_policy" {
name = "msk-health-lambda-role-policy-${random_id.id.hex}"
path = "/"
description = "IAM policy msk health solution lambda"

policy = <<EOF
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
Expand Down Expand Up @@ -87,12 +85,11 @@ resource "aws_iam_policy" "msk_health_lambda_role_policy" {
]
}
EOF
tags = var.tags
tags = var.tags
}


resource "aws_lambda_function" "msk_health_lambda" {
filename = "${path.module}/python/hello-python.zip"
filename = data.archive_file.status_checker_code.output_path
function_name = "msk_status_monitor-${random_id.id.hex}"
description = "MSK Status Monitor"
role = aws_iam_role.msk_health_lambda_role.arn
Expand All @@ -102,10 +99,13 @@ resource "aws_lambda_function" "msk_health_lambda" {
memory_size = 128
timeout = 60
tags = var.tags

tracing_config {
mode = "Active"
}
environment {
variables = {
SNS_TOPIC_ARN = aws_sns_topic.msk_health_sns_topic.arn
SNS_TOPIC_ARN = aws_sns_topic.msk_health_sns_topic.arn
SUPPRESS_STATES = join(",", var.ignore_states)
}
}

Expand Down Expand Up @@ -137,6 +137,6 @@ resource "aws_lambda_permission" "allow_cw_call_lambda" {
# Log group for the Lambda function
resource "aws_cloudwatch_log_group" "msk_health_lambda_log_groups" {
name = "/aws/lambda/msk_status_monitor-${random_id.id.hex}"
retention_in_days = 30
retention_in_days = var.log_retion_period_in_days
tags = var.tags
}
21 changes: 21 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,32 @@ variable "email" {
default = []
}

variable "ignore_states" {
description = "Suppress warnings for the listed MSK states. Default: ['MAINTENANCE']"
type = list(string)
default = [
"MAINTENANCE"
]
}

variable "log_retion_period_in_days" {
type = number
default = 365
description = "Number of days logs will be retained. Default is 365 days."

validation {
condition = contains([1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365,
400, 545, 731, 1096, 1827, 2192, 2557, 2992, 3288, 3653], var.log_retion_period_in_days)
error_message = "log_retion_period_in_days must be one of the allowed values: 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653"
}
}

variable "schedule_expression" {
description = "The schedule expression for the CloudWatch event rule. Default is 'rate(15 minutes)'."
type = string
default = "rate(15 minutes)"
}

variable "tags" {
description = "A map of tags to add to all resources. Default is empty map."
type = map(string)
Expand Down

0 comments on commit 45af487

Please sign in to comment.