diff --git a/.gitignore b/.gitignore index 7a3e2fd..694d85f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,7 @@ crash.log # .tfvars files are managed as part of configuration and so should be included in # version control. # -# example.tfvars +# *.tfvars* # Ignore override files as they are usually used to override resources locally and so # are not checked in diff --git a/README.md b/README.md index 86a338c..7caebf5 100644 --- a/README.md +++ b/README.md @@ -1,100 +1,55 @@ # New Relic Terraform for Multiple Accounts -> Prototype which adopts a flat, modular approach to generating resources at scale for multiple accounts using New Relic's Terraform provider. +> Prototype which adopts a flat, modular approach to generating resources at scale for multiple accounts using New Relic's Terraform provider without any dependencies. - [Prerequisites](#prerequisites) - [Usage](#usage) -- [Installation](#installation) - - [Modules](#modules) - - [Environments](#environments) +- [Configuration](#configuration) - [Support](#support) -- [Roadmap](#roadmap) +- [Issues](#issues) - [Contributing](#contributing) - [License](#license) - [Credits](#credits) ## Prerequisites -- [Terraform](https://www.terraform.io/downloads.html) v0.14.6 -- [New Relic Provider](https://registry.terraform.io/providers/newrelic/newrelic/latest/docs) v2.18.0 +- [Terraform](https://www.terraform.io/downloads.html) ~> 1.0 +- [New Relic Provider](https://registry.terraform.io/providers/newrelic/newrelic/latest/docs) ~> 2.0 ## Usage -1. Once cloned, goto `./environments/` and run `terraform init` to initialize Terraform configuration files. -1. Configure: - 1. Notification channel details.
- Found within [`./modules/channels/main.tf`](modules/channels/main.tf). - 1. Provider details for each account, such as: `account_id` and `api_key`.
- Found within [`./environments/alpha.tf`](environments/alpha.tf) and [`./environments/bravo-x.tf`](environments/bravo-x.tf). -1. When ready, run `terraform apply` to preview changes before applying. - -Observe that notifications channels, alert policies and conditions are generated and linked together for each account. Additionally, `bravo-x` account features a second alert policy with a different set of conditions, while reusing the same notification channels. - -
View tabulated list of alert conditions… - -| environment | signal | type | threshold | occurrences | nrql | -| ----------- | --------------- | -------- | --------: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| alpha | cpu utilisation | baseline | 3 | all | `FROM Metric SELECT average(apm.service.cpu.usertime.utilization) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | cpu utilisation | static | 50 | all | `FROM Metric SELECT average(apm.service.cpu.usertime.utilization) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | error rate | baseline | 3 | all | `FROM Metric SELECT count(apm.service.transaction.error.count) / count(apm.service.transaction.duration) \* 100 FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | error rate | static | 1 | all | `FROM Metric SELECT count(apm.service.transaction.error.count) / count(apm.service.transaction.duration) \* 100 FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | memory usage | baseline | 3 | all | `FROM Metric SELECT average(apm.service.memory.physical) / 1000 FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | response time | baseline | 3 | all | `FROM Metric SELECT average(apm.service.transaction.duration) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | response time | static | 3 | all | `FROM Metric SELECT average(apm.service.transaction.duration) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | throughput | baseline | 3 | all | `FROM Metric SELECT rate(count(apm.service.transaction.duration),1 minute) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| alpha | throughput | static | 999999 | at_least_once | `FROM Metric SELECT count(apm.service.transaction.duration) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | cpu utilisation | baseline | 3 | all | `FROM Metric SELECT average(apm.service.cpu.usertime.utilization) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | cpu utilisation | static | 50 | all | `FROM Metric SELECT average(apm.service.cpu.usertime.utilization) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | error rate | baseline | 3 | all | `FROM Metric SELECT count(apm.service.transaction.error.count) / count(apm.service.transaction.duration) \* 100 FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | error rate | static | 1 | all | `FROM Metric SELECT count(apm.service.transaction.error.count) / count(apm.service.transaction.duration) \* 100 FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | memory usage | baseline | 3 | all | `FROM Metric SELECT average(apm.service.memory.physical) / 1000 FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | response time | baseline | 3 | all | `FROM Metric SELECT average(apm.service.transaction.duration) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | response time | static | 3 | all | `FROM Metric SELECT average(apm.service.transaction.duration) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | throughput | baseline | 3 | all | `FROM Metric SELECT rate(count(apm.service.transaction.duration),1 minute) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravo | throughput | static | 999999 | at_least_once | `FROM Metric SELECT count(apm.service.transaction.duration) FACET appName WHERE appName NOT LIKE 'P%X %'` | -| bravox | cpu utilisation | baseline | 3 | all | `FROM Metric SELECT average(apm.service.cpu.usertime.utilization) FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | cpu utilisation | static | 50 | all | `FROM Metric SELECT average(apm.service.cpu.usertime.utilization) FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | error rate | baseline | 3 | all | `FROM Metric SELECT count(apm.service.transaction.error.count) / count(apm.service.transaction.duration) \* 100 FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | error rate | static | 1 | all | `FROM Metric SELECT count(apm.service.transaction.error.count) / count(apm.service.transaction.duration) \* 100 FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | memory usage | baseline | 3 | all | `FROM Metric SELECT average(apm.service.memory.physical) / 1000 FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | response time | baseline | 3 | all | `FROM Metric SELECT average(apm.service.transaction.duration) FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | response time | static | 3 | all | `FROM Metric SELECT average(apm.service.transaction.duration) FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | throughput | baseline | 3 | all | `FROM Metric SELECT rate(count(apm.service.transaction.duration),1 minute) FACET appName WHERE appName LIKE 'P%X %'` | -| bravox | throughput | static | 999999 | at_least_once | `FROM Metric SELECT count(apm.service.transaction.duration) FACET appName WHERE appName LIKE 'P%X %'` | - -
- -## Installation - -### Modules +1. Clone the repository. +1. Access [terraform.tfvars.json](terraform.tfvars.json). +1. Replace `account_id`, `api_key`, and (optionally) `alias` with your accounts' data. +1. Initialise Terraform with latest provider: `terraform init -upgrade`. +1. Apply configuration with placeholder data: `terraform apply -auto-approve -compact-warnings`. +## Configuration + +- For a flat structure, shared configurations are prefixed `shared--` at the root of the directory. + - This removes the need for external wrappers, such as [Terragrunt](https://terragrunt.gruntwork.io/). + - Each account requires an alias in order to pass its associated `account_id` into referenced modules via `providers`. - Each module is located in its own subdirectory to: - Encourage reusability: keeping code [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself). - Prevent conflict of duplicate entity names. -- They're paired with a generic [`versions.tf`](modules/channels/versions.tf) which inherits specific version details from [`./environments/versions.tf`](environments/versions.tf): remaining flexible for upgrades. - -| Module | Description | Input | Output | -| ------------------------------------ | --------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------- | -| [channels](modules/channels/main.tf) | Alert notification channels. | None. | List of `newrelic_alert_channel` IDs. | -| [alerts](modules/alerts/main.tf) | Alert policy with associated NRQL alert conditions. | Name of alert policy; list of alert channel IDs; selection of apps. | None. | +- Modules are paired with a generic [`versions.tf`](modules/channel/versions.tf) which inherits version details from the root to remain flexible for upgrades. -### Environments - -- Due to the flat directory structure, the `terraform apply` command addresses available accounts altogether. - - This removes the need for external wrappers, such as [Terragrunt](https://terragrunt.gruntwork.io/). - - At the cost of needing each module reference to have a unique name. -- Each account requires an alias in order to pass its associated `account_id` into referenced modules via `providers`. +| Module | Description | Input | Output | +| -------------------------------------- | ----------------------------------------------------------------- | ------------------------------------------------------- | -------------- | +| [apm_alert](modules/apm_alert/main.tf) | APM alert policies and associated NRQL conditions. | Account alias, runbook URL and channel IDs. | None. | +| [channel](modules/channel/main.tf) | Alert notification channels. | None. | Channel IDs. | +| [dashboard](modules/dashboard/main.tf) | Dashboard. | Account alias. | Dashboard URL. | +| [synthetic](modules/synthetic/main.tf) | Synthetic monitors and associated multilocation alert conditions. | Account alias, runbook URL, channel IDs, and endpoints. | None. | ## Support - For general queries about the developer toolkit, [New Relic Explorers Hub](https://discuss.newrelic.com/c/build-on-new-relic/developer-toolkit/) is a good starting point. - For specific queries about New Relic's Terraform provider, [newrelic/terraform-provider-newrelic](https://github.com/newrelic/terraform-provider-newrelic/issues) is your best bet. -## Roadmap +## Issues -1. Override NRQL alert conditions on a per-account basis. -1. Generate NRQL alert conditions by looping through variable map arrays. -1. Create a dashboard module to display [Golden Signals](https://sre.google/sre-book/monitoring-distributed-systems/#xref_monitoring_golden-signals). +- Synthetic multilocation alert condition is changed on every run. + - Workaround: Add `lifecycle { ignore_changes = [entities] }` to the resource to ignore changes. ## Contributing diff --git a/environments/alpha.tf b/environments/alpha.tf deleted file mode 100644 index 618d4fb..0000000 --- a/environments/alpha.tf +++ /dev/null @@ -1,18 +0,0 @@ -provider "newrelic" { - alias = "alpha" - account_id = "OBFUSCATED" - api_key = "OBFUSCATED" -} - -module "alpha__channels" { - source = "../modules/channels" - providers = { newrelic = newrelic.alpha } -} - -module "alpha__alerts" { - source = "../modules/alerts" - providers = { newrelic = newrelic.alpha } - var__policy_name = "${upper("alpha")} app" - var__channel_ids = module.alpha__channels.out__channel_ids - var__nrql_filter = "appName NOT LIKE 'P%X %'" -} diff --git a/environments/bravo-x.tf b/environments/bravo-x.tf deleted file mode 100644 index 2019734..0000000 --- a/environments/bravo-x.tf +++ /dev/null @@ -1,26 +0,0 @@ -provider "newrelic" { - alias = "bravo" - account_id = "OBFUSCATED" - api_key = "OBFUSCATED" -} - -module "bravo__channels" { - source = "../modules/channels" - providers = { newrelic = newrelic.bravo } -} - -module "bravo__alerts" { - source = "../modules/alerts" - providers = { newrelic = newrelic.bravo } - var__policy_name = "${upper("bravo")} app" - var__channel_ids = module.bravo__channels.out__channel_ids - var__nrql_filter = "appName NOT LIKE 'P%X %'" -} - -module "bravox__alerts" { - source = "../modules/alerts" - providers = { newrelic = newrelic.bravo } - var__policy_name = "${upper("bravo")}X app" - var__channel_ids = module.bravo__channels.out__channel_ids - var__nrql_filter = "appName LIKE 'P%X %'" -} diff --git a/modules/alerts/main.tf b/modules/alerts/main.tf deleted file mode 100644 index 4a550ec..0000000 --- a/modules/alerts/main.tf +++ /dev/null @@ -1,241 +0,0 @@ -locals { - nrql__response_time = "FROM Metric SELECT average(apm.service.transaction.duration)" - nrql__throughput = "FROM Metric SELECT rate(count(apm.service.transaction.duration), 1 minute)" - nrql__error_rate = "FROM Metric SELECT count(apm.service.transaction.error.count) / count(apm.service.transaction.duration) * 100" - nrql__cpu_utilisation = "FROM Metric SELECT average(apm.service.cpu.usertime.utilization)" - nrql__memory_usage = "FROM Metric SELECT average(apm.service.memory.physical) / 1000" -} - -variable "var__policy_name" { - type = string - description = "Name of alert policy" -} - -variable "var__channel_ids" { - type = list(any) - description = "List of alert channel IDs" -} - -variable "var__nrql_filter" { - type = string - description = "Selection of apps" -} - -resource "newrelic_alert_policy" "policy__alert" { - name = var.var__policy_name - incident_preference = "PER_CONDITION_AND_TARGET" - channel_ids = var.var__channel_ids -} - -resource "newrelic_nrql_alert_condition" "response_time__threshold" { - name = "${var.var__policy_name} response time exceeded threshold" - type = "static" - value_function = "single_value" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__response_time} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} - -resource "newrelic_nrql_alert_condition" "response_time__baseline" { - name = "${var.var__policy_name} response time above baseline" - type = "baseline" - baseline_direction = "upper_only" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__response_time} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} - -resource "newrelic_nrql_alert_condition" "throughput__baseline" { - name = "${var.var__policy_name} throughput above baseline" - type = "baseline" - baseline_direction = "upper_only" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__throughput} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} - -resource "newrelic_nrql_alert_condition" "throughput__signal_lost" { - name = "${var.var__policy_name} throughput signal lost" - type = "static" - value_function = "single_value" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__throughput} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 999999 - threshold_occurrences = "AT_LEAST_ONCE" - threshold_duration = 900 - } - expiration_duration = 14400 - open_violation_on_expiration = true - close_violations_on_expiration = false -} - -resource "newrelic_nrql_alert_condition" "error_rate__threshold" { - name = "${var.var__policy_name} error rate exceeded threshold" - type = "static" - value_function = "single_value" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__error_rate} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 1 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 1 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} - -resource "newrelic_nrql_alert_condition" "error_rate__baseline" { - name = "${var.var__policy_name} error rate above baseline" - type = "baseline" - baseline_direction = "upper_only" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__error_rate} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} - -resource "newrelic_nrql_alert_condition" "cpu_utilisation__threshold" { - name = "${var.var__policy_name} cpu utilisation exceeded threshold" - type = "static" - value_function = "single_value" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__cpu_utilisation} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 70 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 70 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} - -resource "newrelic_nrql_alert_condition" "cpu_utilisation__baseline" { - name = "${var.var__policy_name} cpu utilisation above baseline" - type = "baseline" - baseline_direction = "upper_only" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__cpu_utilisation} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} - -resource "newrelic_nrql_alert_condition" "memory_usage__baseline" { - name = "${var.var__policy_name} memory usage above baseline" - type = "baseline" - baseline_direction = "upper_only" - violation_time_limit_seconds = 36000 - policy_id = newrelic_alert_policy.policy__alert.id - nrql { - query = "${local.nrql__memory_usage} FACET appName WHERE ${var.var__nrql_filter}" - evaluation_offset = 3 - } - critical { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 900 - } - warning { - operator = "above" - threshold = 3 - threshold_occurrences = "ALL" - threshold_duration = 720 - } -} diff --git a/modules/apm_alert/main.tf b/modules/apm_alert/main.tf new file mode 100644 index 0000000..f787a78 --- /dev/null +++ b/modules/apm_alert/main.tf @@ -0,0 +1,43 @@ +resource "newrelic_alert_policy" "policy" { + name = "${var.var--alias} APM Alerts" + incident_preference = "PER_CONDITION_AND_TARGET" +} + +resource "newrelic_alert_policy_channel" "policy_channel" { + policy_id = newrelic_alert_policy.policy.id + channel_ids = var.var--channel_ids +} + +resource "newrelic_nrql_alert_condition" "condition" { + for_each = local.condition + policy_id = newrelic_alert_policy.policy.id + name = each.value.name + type = each.value.type + runbook_url = var.var--runbook_url + expiration_duration = 60 * 2 + violation_time_limit_seconds = 60 * 60 * 24 + slide_by = 30 + aggregation_delay = 60 * 2 + aggregation_window = 60 + aggregation_method = "event_flow" + fill_option = "static" + fill_value = 0 + open_violation_on_expiration = false + close_violations_on_expiration = true + enabled = true + nrql { + query = each.value.query + } + critical { + threshold = each.value.threshold + threshold_duration = each.value.duration * 60 + operator = "above" + threshold_occurrences = "all" + } + warning { + threshold = each.value.threshold + threshold_duration = floor(each.value.duration * 0.8) * 60 + operator = "above" + threshold_occurrences = "all" + } +} diff --git a/modules/apm_alert/variables.tf b/modules/apm_alert/variables.tf new file mode 100644 index 0000000..b2eda24 --- /dev/null +++ b/modules/apm_alert/variables.tf @@ -0,0 +1,53 @@ +locals { + condition = { + latency_app = { + name = "app exceeded latency threshold" + query = "${local.nrql.latency} FACET appName" + type = "static" + threshold = 4 + duration = 6 + } + latency_host = { + name = "host exceeded latency threshold" + query = "${local.nrql.latency} FACET host" + type = "static" + threshold = 6 + duration = 8 + } + + error_app = { + name = "app exceeded error threshold" + query = "${local.nrql.error} FACET appName" + type = "static" + threshold = 2 + duration = 6 + } + error_host = { + name = "host exceeded error threshold" + query = "${local.nrql.error} FACET host" + type = "static" + threshold = 4 + duration = 8 + } + } + + nrql = { + latency = "FROM Transaction SELECT average(duration) AS 'latency (sec)'" + error = "FROM Transaction SELECT percentage(count(*), WHERE error IS true) AS 'error (%)'" + } +} + +variable "var--alias" { + description = "Account alias" + type = string +} + +variable "var--runbook_url" { + description = "Runbook URL" + type = string +} + +variable "var--channel_ids" { + description = "Channel IDs" + type = list(number) +} diff --git a/modules/alerts/versions.tf b/modules/apm_alert/versions.tf similarity index 100% rename from modules/alerts/versions.tf rename to modules/apm_alert/versions.tf diff --git a/modules/channel/main.tf b/modules/channel/main.tf new file mode 100644 index 0000000..802a83d --- /dev/null +++ b/modules/channel/main.tf @@ -0,0 +1,11 @@ +resource "newrelic_alert_channel" "email" { + name = "Email: Obfuscated" + type = "email" + config { recipients = "email@obfuscated.com" } +} + +resource "newrelic_alert_channel" "pagerduty" { + name = "PagerDuty: Obfuscated" + type = "pagerduty" + config { service_key = "obfuscated" } +} diff --git a/modules/channel/outputs.tf b/modules/channel/outputs.tf new file mode 100644 index 0000000..3a79896 --- /dev/null +++ b/modules/channel/outputs.tf @@ -0,0 +1,7 @@ +output "out--channel_id" { + description = "Channel IDs" + value = { + email = newrelic_alert_channel.email.id, + pagerduty = newrelic_alert_channel.pagerduty.id, + } +} diff --git a/modules/channels/versions.tf b/modules/channel/versions.tf similarity index 100% rename from modules/channels/versions.tf rename to modules/channel/versions.tf diff --git a/modules/channels/main.tf b/modules/channels/main.tf deleted file mode 100644 index af16687..0000000 --- a/modules/channels/main.tf +++ /dev/null @@ -1,22 +0,0 @@ -resource "newrelic_alert_channel" "channel__email" { - name = "Email : OBFUSCATED" - type = "email" - config { - recipients = "OBFUSCATED" - } -} - -resource "newrelic_alert_channel" "channel__pd" { - name = "PD : OBFUSCATED" - type = "pagerduty" - config { - service_key = "OBFUSCATED" - } -} - -output "out__channel_ids" { - value = [ - newrelic_alert_channel.channel__email.id, - newrelic_alert_channel.channel__pd.id, - ] -} diff --git a/modules/dashboard/main.tf b/modules/dashboard/main.tf new file mode 100644 index 0000000..620f119 --- /dev/null +++ b/modules/dashboard/main.tf @@ -0,0 +1,123 @@ +locals { + latency = "FROM Transaction SELECT average(duration) AS 'latency (sec)'" + error = "FROM Transaction SELECT percentage(count(*), WHERE error IS true) AS 'error (%)'" +} + +resource "newrelic_one_dashboard" "dashboard" { + name = "${var.var--alias} Dashboard" + permissions = "public_read_write" + page { + name = "${var.var--alias} Dashboard" + widget_markdown { + title = "" + row = 1 + column = 1 + width = 4 + height = 6 + text = <<-EOT + # Entity level + + Breakout activity across the [Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) for SRE. + +   + ## Golden signals + + * **Latency:** Response time taken to service a request: seconds (sec). + * **Error:** Failure rate of requests with error codes: percentage (%). + +   + ## Time + + Along with the time picker in the top-right corner, you can click-hold-and-drag across any chart to zoom into that period of time. + EOT + } + widget_line { + title = "⏱️ Latency by app (sec)" + row = 1 + column = 5 + width = 4 + height = 3 + nrql_query { + query = "${local.latency} FACET appName SINCE 1 hour AGO LIMIT MAX TIMESERIES AUTO" + } + } + widget_line { + title = "⏱️ Latency by host (sec)" + row = 1 + column = 9 + width = 4 + height = 3 + nrql_query { + query = "${local.latency} FACET host SINCE 1 hour AGO LIMIT MAX TIMESERIES AUTO" + } + } + widget_line { + title = "🚫 Error by app (%)" + row = 4 + column = 5 + width = 4 + height = 3 + nrql_query { + query = "${local.error} FACET appName SINCE 1 hour AGO LIMIT MAX TIMESERIES AUTO" + } + } + widget_line { + title = "🚫 Error by host (%)" + row = 4 + column = 9 + width = 4 + height = 3 + nrql_query { + query = "${local.error} FACET host SINCE 1 hour AGO LIMIT MAX TIMESERIES AUTO" + } + } + widget_markdown { + title = "" + row = 7 + column = 1 + width = 12 + height = 1 + text = "" + } + widget_markdown { + title = "" + row = 8 + column = 1 + width = 4 + height = 4 + text = <<-EOT + # Filter level + + Slice activity in any way that's required. For example: + + * **App:** Activity across any given app. + * **Host:** Activity across any given host. + +   + **TIP:** Combine filters to surface relational activity. + EOT + } + widget_bar { + title = "🔻 Filter by app" + row = 8 + column = 5 + width = 4 + height = 4 + nrql_query { + query = "${local.latency} FACET appName SINCE 1 hour AGO LIMIT MAX" + } + filter_current_dashboard = true + } + widget_bar { + title = "🔻 Filter by host" + row = 8 + column = 9 + width = 4 + height = 4 + nrql_query { + query = "${local.latency} FACET host SINCE 1 hour AGO LIMIT MAX" + } + filter_current_dashboard = true + } + } +} diff --git a/modules/dashboard/outputs.tf b/modules/dashboard/outputs.tf new file mode 100644 index 0000000..7fb9aac --- /dev/null +++ b/modules/dashboard/outputs.tf @@ -0,0 +1,4 @@ +output "out--dashboard_url" { + description = "Dashboard URL" + value = newrelic_one_dashboard.dashboard.permalink +} diff --git a/modules/dashboard/variables.tf b/modules/dashboard/variables.tf new file mode 100644 index 0000000..1d087ee --- /dev/null +++ b/modules/dashboard/variables.tf @@ -0,0 +1,4 @@ +variable "var--alias" { + description = "Account alias" + type = string +} diff --git a/modules/dashboard/versions.tf b/modules/dashboard/versions.tf new file mode 100644 index 0000000..31a8cc0 --- /dev/null +++ b/modules/dashboard/versions.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + newrelic = { + source = "newrelic/newrelic" + } + } +} diff --git a/modules/synthetic/main.tf b/modules/synthetic/main.tf new file mode 100644 index 0000000..f39c036 --- /dev/null +++ b/modules/synthetic/main.tf @@ -0,0 +1,31 @@ +resource "newrelic_alert_policy" "policy" { + name = "${var.var--alias} Synthetic Alerts" + incident_preference = "PER_CONDITION_AND_TARGET" +} + +resource "newrelic_alert_policy_channel" "policy_channel" { + policy_id = newrelic_alert_policy.policy.id + channel_ids = var.var--channel_ids +} + +resource "newrelic_synthetics_monitor" "monitor" { + for_each = var.var--endpoints + name = "${var.var--alias} ${each.key}" + type = "SIMPLE" + status = "ENABLED" + bypass_head_request = true + verify_ssl = false + frequency = 5 + uri = "https://${each.key}" + locations = each.value +} + +resource "newrelic_synthetics_multilocation_alert_condition" "condition" { + entities = [for v in newrelic_synthetics_monitor.monitor : v.id] + policy_id = newrelic_alert_policy.policy.id + name = "violated synthetic check" + runbook_url = var.var--runbook_url + violation_time_limit_seconds = 60 * 60 * 24 + critical { threshold = 3 } + warning { threshold = 1 } +} diff --git a/modules/synthetic/variables.tf b/modules/synthetic/variables.tf new file mode 100644 index 0000000..8285309 --- /dev/null +++ b/modules/synthetic/variables.tf @@ -0,0 +1,19 @@ +variable "var--alias" { + description = "Account alias" + type = string +} + +variable "var--runbook_url" { + description = "Runbook URL" + type = string +} + +variable "var--channel_ids" { + description = "Channel IDs" + type = list(number) +} + +variable "var--endpoints" { + description = "Endpoints" + type = map(any) +} diff --git a/modules/synthetic/versions.tf b/modules/synthetic/versions.tf new file mode 100644 index 0000000..31a8cc0 --- /dev/null +++ b/modules/synthetic/versions.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + newrelic = { + source = "newrelic/newrelic" + } + } +} diff --git a/providers.tf b/providers.tf new file mode 100644 index 0000000..956437a --- /dev/null +++ b/providers.tf @@ -0,0 +1,23 @@ +provider "newrelic" { + alias = "amethyst" + account_id = var.account.amethyst.account_id + api_key = var.account.amethyst.api_key +} + +provider "newrelic" { + alias = "burgundy" + account_id = var.account.burgundy.account_id + api_key = var.account.burgundy.api_key +} + +provider "newrelic" { + alias = "cerulean" + account_id = var.account.cerulean.account_id + api_key = var.account.cerulean.api_key +} + +provider "newrelic" { + alias = "dartmouth" + account_id = var.account.dartmouth.account_id + api_key = var.account.dartmouth.api_key +} diff --git a/shared--apm_alert.tf b/shared--apm_alert.tf new file mode 100644 index 0000000..d679d81 --- /dev/null +++ b/shared--apm_alert.tf @@ -0,0 +1,43 @@ +module "amethyst--alerts" { + source = "./modules/apm_alert" + providers = { newrelic = newrelic.amethyst } + var--alias = var.account.amethyst.alias + var--runbook_url = "https://obfuscated.com" + var--channel_ids = [ + module.amethyst--channel.out--channel_id.email, + module.amethyst--channel.out--channel_id.pagerduty, + ] +} + +module "burgundy--alerts" { + source = "./modules/apm_alert" + providers = { newrelic = newrelic.burgundy } + var--runbook_url = "https://obfuscated.com" + var--alias = var.account.burgundy.alias + var--channel_ids = [ + module.burgundy--channel.out--channel_id.email, + module.burgundy--channel.out--channel_id.pagerduty, + ] +} + +module "cerulean--alerts" { + source = "./modules/apm_alert" + providers = { newrelic = newrelic.cerulean } + var--runbook_url = "https://obfuscated.com" + var--alias = var.account.cerulean.alias + var--channel_ids = [ + module.cerulean--channel.out--channel_id.email, + module.cerulean--channel.out--channel_id.pagerduty, + ] +} + +module "dartmouth--alerts" { + source = "./modules/apm_alert" + providers = { newrelic = newrelic.dartmouth } + var--runbook_url = "https://obfuscated.com" + var--alias = var.account.dartmouth.alias + var--channel_ids = [ + module.dartmouth--channel.out--channel_id.email, + module.dartmouth--channel.out--channel_id.pagerduty, + ] +} diff --git a/shared--channel.tf b/shared--channel.tf new file mode 100644 index 0000000..8724313 --- /dev/null +++ b/shared--channel.tf @@ -0,0 +1,19 @@ +module "amethyst--channel" { + source = "./modules/channel" + providers = { newrelic = newrelic.amethyst } +} + +module "burgundy--channel" { + source = "./modules/channel" + providers = { newrelic = newrelic.burgundy } +} + +module "cerulean--channel" { + source = "./modules/channel" + providers = { newrelic = newrelic.cerulean } +} + +module "dartmouth--channel" { + source = "./modules/channel" + providers = { newrelic = newrelic.dartmouth } +} diff --git a/shared--dashboard.tf b/shared--dashboard.tf new file mode 100644 index 0000000..0428520 --- /dev/null +++ b/shared--dashboard.tf @@ -0,0 +1,23 @@ +module "amethyst--dashboard" { + source = "./modules/dashboard" + providers = { newrelic = newrelic.amethyst } + var--alias = var.account.amethyst.alias +} + +module "burgundy--dashboard" { + source = "./modules/dashboard" + providers = { newrelic = newrelic.burgundy } + var--alias = upper(var.account.burgundy.alias) +} + +module "cerulean--dashboard" { + source = "./modules/dashboard" + providers = { newrelic = newrelic.cerulean } + var--alias = upper(var.account.cerulean.alias) +} + +module "dartmouth--dashboard" { + source = "./modules/dashboard" + providers = { newrelic = newrelic.dartmouth } + var--alias = upper(var.account.dartmouth.alias) +} diff --git a/shared--synthetic.tf b/shared--synthetic.tf new file mode 100644 index 0000000..806b37d --- /dev/null +++ b/shared--synthetic.tf @@ -0,0 +1,63 @@ +module "amethyst--synthetic" { + source = "./modules/synthetic" + providers = { newrelic = newrelic.amethyst } + var--alias = var.account.amethyst.alias + var--runbook_url = "https://obfuscated.com" + var--channel_ids = [ + module.amethyst--channel.out--channel_id.email, + module.amethyst--channel.out--channel_id.pagerduty, + ] + var--endpoints = { + "google.com" = var.location.amer + "youtube.com" = var.location.emea + "facebook.com" = var.location.apac + } +} + +module "burgundy--synthetic" { + source = "./modules/synthetic" + providers = { newrelic = newrelic.burgundy } + var--alias = var.account.burgundy.alias + var--runbook_url = "https://obfuscated.com" + var--channel_ids = [ + module.burgundy--channel.out--channel_id.email, + module.burgundy--channel.out--channel_id.pagerduty, + ] + var--endpoints = { + "twitter.com" = var.location.amer + "instagram.com" = var.location.emea + "yahoo.com" = var.location.apac + } +} + +module "cerulean--synthetic" { + source = "./modules/synthetic" + providers = { newrelic = newrelic.cerulean } + var--alias = var.account.cerulean.alias + var--runbook_url = "https://obfuscated.com" + var--channel_ids = [ + module.cerulean--channel.out--channel_id.email, + module.cerulean--channel.out--channel_id.pagerduty, + ] + var--endpoints = { + "whatsapp.com" = var.location.amer + "amazon.com" = var.location.emea + "netflix.com" = var.location.apac + } +} + +module "dartmouth--synthetic" { + source = "./modules/synthetic" + providers = { newrelic = newrelic.dartmouth } + var--alias = var.account.dartmouth.alias + var--runbook_url = "https://obfuscated.com" + var--channel_ids = [ + module.dartmouth--channel.out--channel_id.email, + module.dartmouth--channel.out--channel_id.pagerduty, + ] + var--endpoints = { + "office.com" = var.location.amer + "reddit.com" = var.location.emea + "linkedin.com" = var.location.apac + } +} diff --git a/terraform.tfvars.json b/terraform.tfvars.json new file mode 100644 index 0000000..817080a --- /dev/null +++ b/terraform.tfvars.json @@ -0,0 +1,50 @@ +{ + "account": { + "amethyst": { + "alias": "Amethyst", + "account_id": 123456, + "api_key": "OBFUSCATED" + }, + "burgundy": { + "alias": "Burgundy", + "account_id": 123456, + "api_key": "OBFUSCATED" + }, + "cerulean": { + "alias": "Cerulean", + "account_id": 123456, + "api_key": "OBFUSCATED" + }, + "dartmouth": { + "alias": "Dartmouth", + "account_id": 123456, + "api_key": "OBFUSCATED" + } + }, + "location": { + "amer": [ + "AWS_CA_CENTRAL_1", + "AWS_SA_EAST_1", + "AWS_US_EAST_1", + "AWS_US_EAST_2", + "AWS_US_WEST_1", + "AWS_US_WEST_2" + ], + "apac": [ + "AWS_AP_EAST_1", + "AWS_AP_NORTHEAST_1", + "AWS_AP_NORTHEAST_2", + "AWS_AP_SOUTHEAST_1", + "AWS_AP_SOUTHEAST_2", + "AWS_AP_SOUTH_1" + ], + "emea": [ + "AWS_AF_SOUTH_1", + "AWS_EU_CENTRAL_1", + "AWS_EU_NORTH_1", + "AWS_EU_SOUTH_1", + "AWS_EU_WEST_2", + "AWS_ME_SOUTH_1" + ] + } +} diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..2edf5e0 --- /dev/null +++ b/variables.tf @@ -0,0 +1,10 @@ +variable "account" { + description = "Account data" + type = map(any) + sensitive = true +} + +variable "location" { + description = "Locations" + type = map(any) +} diff --git a/environments/versions.tf b/versions.tf similarity index 62% rename from environments/versions.tf rename to versions.tf index 1bde982..33558be 100644 --- a/environments/versions.tf +++ b/versions.tf @@ -1,9 +1,9 @@ terraform { - required_version = "~> 0.14.0" + required_version = "~> 1.0" required_providers { newrelic = { source = "newrelic/newrelic" - version = "~> 2.18.0" + version = "~> 2.0" } } }