From edfff23a634152c02a75c78246d5784b0c2f75db Mon Sep 17 00:00:00 2001 From: Jake Heath <76011913+jakeyheath@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:27:24 -0700 Subject: [PATCH 01/19] feat: all more options when creating the trust relationship (#525) * feat: allow github actions to attach more assume role policies * more options --- aws-iam-role-github-action/main.tf | 23 ++++++++++++++++++++++- aws-iam-role-github-action/variables.tf | 12 ++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/aws-iam-role-github-action/main.tf b/aws-iam-role-github-action/main.tf index a4481fa0..e4ba9612 100644 --- a/aws-iam-role-github-action/main.tf +++ b/aws-iam-role-github-action/main.tf @@ -7,10 +7,24 @@ locals { // https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-amazon-web-services#adding-the-identity-provider-to-aws data "aws_iam_policy_document" "assume_role" { + dynamic "statement" { + for_each = var.authorized_aws_accounts + + content { + sid = "AllowAssumeRoleFrom${statement.key}" + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${statement.value}:root"] + } + actions = ["sts:AssumeRole", "sts:TagSession"] + effect = "Allow" + } + } dynamic "statement" { for_each = var.authorized_github_repos content { + sid = "AllowGithubActionsToAssumeRole" principals { type = "Federated" identifiers = [local.idp_arn] @@ -30,12 +44,19 @@ data "aws_iam_policy_document" "assume_role" { } } +data "aws_iam_policy_document" "this" { + source_policy_documents = compact([ + data.aws_iam_policy_document.assume_role.json, + var.additional_assume_role_policies_json, + ]) +} + resource "aws_iam_role" "role" { name = var.role.name tags = var.tags - assume_role_policy = data.aws_iam_policy_document.assume_role.json + assume_role_policy = data.aws_iam_policy_document.this.json max_session_duration = 60 * 60 // 1 hour, not sure what max github action exec time is # We have to force detach policies in order to recreate roles. diff --git a/aws-iam-role-github-action/variables.tf b/aws-iam-role-github-action/variables.tf index f954df17..6c390296 100644 --- a/aws-iam-role-github-action/variables.tf +++ b/aws-iam-role-github-action/variables.tf @@ -25,3 +25,15 @@ variable "tags" { description = "Standard tagging." } + +variable "authorized_aws_accounts" { + type = map(string) + description = "The map of authorized AWS accounts to assume the created role." + default = {} +} + +variable "additional_assume_role_policies_json" { + type = string + description = "The JSON string of any other additional assume role policies to add to the Github Actions role" + default = "" +} \ No newline at end of file From 94001ec8e60a158db6f4ad330296592b8c1d9927 Mon Sep 17 00:00:00 2001 From: "czi-github-helper[bot]" <95879977+czi-github-helper[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:28:36 -0700 Subject: [PATCH 02/19] chore(main): release 0.61.0 (#526) Co-authored-by: czi-github-helper[bot] <95879977+czi-github-helper[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ version.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e73aa4a2..005dacb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.61.0](https://github.com/chanzuckerberg/cztack/compare/v0.60.1...v0.61.0) (2023-10-30) + + +### Features + +* all more options when creating the trust relationship ([#525](https://github.com/chanzuckerberg/cztack/issues/525)) ([edfff23](https://github.com/chanzuckerberg/cztack/commit/edfff23a634152c02a75c78246d5784b0c2f75db)) + ## [0.60.1](https://github.com/chanzuckerberg/cztack/compare/v0.60.0...v0.60.1) (2023-10-03) diff --git a/version.txt b/version.txt index c3063f35..0b094550 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.60.1 +0.61.0 From e527df1d2e6f189804543592dde715c8dcd4d574 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Mon, 30 Oct 2023 18:03:12 -0700 Subject: [PATCH 03/19] add metastore module (#528) --- databricks-metastore/README.md | 61 +++++++++++++ databricks-metastore/main.tf | 61 +++++++++++++ databricks-metastore/outputs.tf | 4 + databricks-metastore/provider.tf | 10 ++ databricks-metastore/s3.tf | 147 ++++++++++++++++++++++++++++++ databricks-metastore/variables.tf | 68 ++++++++++++++ databricks-metastore/versions.tf | 11 +++ 7 files changed, 362 insertions(+) create mode 100644 databricks-metastore/README.md create mode 100644 databricks-metastore/main.tf create mode 100644 databricks-metastore/outputs.tf create mode 100644 databricks-metastore/provider.tf create mode 100644 databricks-metastore/s3.tf create mode 100644 databricks-metastore/variables.tf create mode 100644 databricks-metastore/versions.tf diff --git a/databricks-metastore/README.md b/databricks-metastore/README.md new file mode 100644 index 00000000..b28d903e --- /dev/null +++ b/databricks-metastore/README.md @@ -0,0 +1,61 @@ + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | n/a | +| [databricks.workspace](#provider\_databricks.workspace) | n/a | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_iam_policy.metastore_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_policy_attachment.metastore_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy_attachment) | resource | +| [aws_iam_role.metastore_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_kms_alias.metastore_key_alias](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_alias) | resource | +| [aws_kms_key.metastore_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_key) | resource | +| [aws_s3_bucket.metastore](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket) | resource | +| [databricks_catalog.sandbox](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/catalog) | resource | +| [databricks_grants.admin](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grants) | resource | +| [databricks_grants.poweruser](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grants) | resource | +| [databricks_metastore.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/metastore) | resource | +| [databricks_metastore_assignment.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/metastore_assignment) | resource | +| [databricks_metastore_data_access.metastore_data_access](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/metastore_data_access) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_policy_document.metastore_assumerole_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.metastore_role_access_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [admin\_groups](#input\_admin\_groups) | List of databricks groups to grant admin access for metastore; includes owner by default | `list(string)` | `[]` | no | +| [databricks\_external\_id](#input\_databricks\_external\_id) | External ID for Databricks account | `string` | n/a | yes | +| [deletion\_window\_in\_days](#input\_deletion\_window\_in\_days) | Deletion window in days for S3 encryption key | `number` | `7` | no | +| [delta\_sharing\_recipient\_token\_lifetime\_in\_seconds](#input\_delta\_sharing\_recipient\_token\_lifetime\_in\_seconds) | Lifetime of delta sharing recipient token in seconds | `number` | `3600` | no | +| [delta\_sharing\_scope](#input\_delta\_sharing\_scope) | Delta sharing scope | `string` | `"INTERNAL"` | no | +| [enable\_key\_rotation](#input\_enable\_key\_rotation) | Enable key rotation for S3 encryption key | `bool` | `true` | no | +| [force\_destroy](#input\_force\_destroy) | Force destroy metastore if data exists | `bool` | `false` | no | +| [owner](#input\_owner) | Owner of the metastore; should be a group display name | `string` | `"data-infra-admin"` | no | +| [powerusers](#input\_powerusers) | List of databricks groups to grant poweruser access for metastore | `list(string)` |
[
"powerusers"
]
| no | +| [tags](#input\_tags) | Fogg generated tags for the environment | `object({ project : string, env : string, service : string, owner : string })` | n/a | yes | +| [workspace\_url](#input\_workspace\_url) | URL of the workspace to use to create this metastore | `string` | n/a | yes | +| [workspaces](#input\_workspaces) | Map of workspace names to ids to associate with this metastore | `map(string)` | `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [metastore\_id](#output\_metastore\_id) | ID of the metastore | + \ No newline at end of file diff --git a/databricks-metastore/main.tf b/databricks-metastore/main.tf new file mode 100644 index 00000000..964b2f37 --- /dev/null +++ b/databricks-metastore/main.tf @@ -0,0 +1,61 @@ +locals { + name = "${var.tags.project}-${var.tags.env}-${var.tags.service}" + admins = toset(concat(var.admin_groups, [var.owner])) + workspace_ids = values(var.workspaces) +} + +resource "databricks_metastore" "this" { + provider = databricks.workspace + name = "${local.name}-metastore" + storage_root = "s3://${aws_s3_bucket.metastore.id}/metastore" + owner = var.owner + delta_sharing_scope = var.delta_sharing_scope + delta_sharing_recipient_token_lifetime_in_seconds = var.delta_sharing_recipient_token_lifetime_in_seconds + force_destroy = var.force_destroy +} + +resource "databricks_metastore_assignment" "this" { + provider = databricks.workspace + for_each = toset(local.workspace_ids) + metastore_id = databricks_metastore.this.id + workspace_id = each.value +} + +resource "databricks_grants" "admin" { + for_each = local.admins + provider = databricks.workspace + metastore = databricks_metastore.this.id + grant { + principal = each.value + privileges = ["CREATE_CATALOG", "CREATE_EXTERNAL_LOCATION", "CREATE_SHARE", "CREATE_RECIPIENT", "CREATE_PROVIDER"] + } +} + +resource "databricks_grants" "poweruser" { + for_each = toset(var.powerusers) + provider = databricks.workspace + metastore = databricks_metastore.this.id + grant { + principal = each.value + privileges = ["CREATE_CATALOG", "CREATE_SHARE"] + } +} + +resource "databricks_metastore_data_access" "metastore_data_access" { + provider = databricks.workspace + depends_on = [databricks_metastore.this] + metastore_id = databricks_metastore.this.id + name = aws_iam_role.metastore_access.name + aws_iam_role { role_arn = aws_iam_role.metastore_access.arn } + is_default = true +} + +resource "databricks_catalog" "sandbox" { + provider = databricks.workspace + metastore_id = databricks_metastore.this.id + name = "sandbox" + comment = "this catalog is managed by terraform" + properties = { + purpose = "testing" + } +} \ No newline at end of file diff --git a/databricks-metastore/outputs.tf b/databricks-metastore/outputs.tf new file mode 100644 index 00000000..ec468927 --- /dev/null +++ b/databricks-metastore/outputs.tf @@ -0,0 +1,4 @@ +output "metastore_id" { + description = "ID of the metastore" + value = databricks_metastore.this.id +} diff --git a/databricks-metastore/provider.tf b/databricks-metastore/provider.tf new file mode 100644 index 00000000..50107158 --- /dev/null +++ b/databricks-metastore/provider.tf @@ -0,0 +1,10 @@ +provider "databricks" { + alias = "mws" + host = "https://accounts.cloud.databricks.com" + account_id = var.databricks_external_id +} + +provider "databricks" { + alias = "workspace" + host = var.workspace_url +} diff --git a/databricks-metastore/s3.tf b/databricks-metastore/s3.tf new file mode 100644 index 00000000..cb89ba40 --- /dev/null +++ b/databricks-metastore/s3.tf @@ -0,0 +1,147 @@ +## Sets up a metastore for use with Databricks Unity Catalog +## https://docs.databricks.com/data-governance/unity-catalog/get-started.html + +locals { + metastore_access_role_name = "${local.name}-access" +} + +## Bucket which will be used for the metastore, with KMS for encryption + +resource "aws_s3_bucket" "metastore" { + bucket = local.name + tags = var.tags + server_side_encryption_configuration { + rule { + apply_server_side_encryption_by_default { + kms_master_key_id = aws_kms_key.metastore_key.arn + sse_algorithm = "aws:kms" + } + } + } +} + +resource "aws_kms_key" "metastore_key" { + description = "KMS key for ${local.name}" + deletion_window_in_days = var.deletion_window_in_days + enable_key_rotation = var.enable_key_rotation + tags = var.tags +} + +resource "aws_kms_alias" "metastore_key_alias" { + name = "alias/${local.name}-key" + target_key_id = aws_kms_key.metastore_key.id +} + +## Allow Databricks role to assume our role + +data "aws_caller_identity" "current" {} + +data "aws_iam_policy_document" "metastore_assumerole_policy" { + statement { + effect = "Allow" + actions = [ + "sts:AssumeRole" + ] + principals { + type = "AWS" + identifiers = [ + # Default role for all databricks accounts https://docs.databricks.com/data-governance/unity-catalog/automate.html#configure-storage-for-a-metastore + "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL" + ] + } + condition { + test = "StringEquals" + variable = "sts:ExternalId" + values = [ + # This is our non-education account number + var.databricks_external_id + ] + } + } + # AWS introduced a new change 6/30/23 that requires IAM policies to self-reference and allow the role to + # assume itself. We can't just use the arn as-is since the role might not exist yet + # https://docs.databricks.com/data-governance/unity-catalog/get-started.html#configure-a-storage-bucket-and-iam-role-in-aws + statement { + effect = "Allow" + actions = [ + "sts:AssumeRole" + ] + principals { + type = "AWS" + identifiers = [ + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" + ] + } + condition { + test = "ArnEquals" + variable = "aws:PrincipalArn" + values = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${local.metastore_access_role_name}"] + } + } +} + + +## Create role which will be assumed by Databricks Unity Catalog + +resource "aws_iam_role" "metastore_access" { + name = local.metastore_access_role_name + assume_role_policy = data.aws_iam_policy_document.metastore_assumerole_policy.json + tags = var.tags +} + + +## Allow our role access S3 and KMS + +resource "aws_iam_policy_attachment" "metastore_access" { + name = "${local.name}-policy" + roles = [aws_iam_role.metastore_access.name] + policy_arn = aws_iam_policy.metastore_access.arn +} + +resource "aws_iam_policy" "metastore_access" { + name = "${local.name}-s3-kms-access" + description = "Allow access to the ${local.name} bucket" + policy = data.aws_iam_policy_document.metastore_role_access_policy.json +} + +data "aws_iam_policy_document" "metastore_role_access_policy" { + statement { + sid = "S3RWBucketAccess" + effect = "Allow" + actions = [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket", + "s3:GetBucketLocation", + "s3:GetLifecycleConfiguration", + "s3:PutLifecycleConfiguration" + ] + resources = [ + "arn:aws:s3:::${aws_s3_bucket.metastore.id}", + "arn:aws:s3:::${aws_s3_bucket.metastore.id}/*" + ] + } + statement { + sid = "KMSAccess" + effect = "Allow" + actions = [ + "kms:Decrypt", + "kms:Encrypt", + "kms:GenerateDataKey*" + ] + resources = [ + aws_kms_key.metastore_key.arn + ] + } + statement { + sid = "STSAssumeRoleAccess" + effect = "Allow" + actions = [ + "sts:AssumeRole" + ] + resources = [ + aws_iam_role.metastore_access.arn + ] + } +} \ No newline at end of file diff --git a/databricks-metastore/variables.tf b/databricks-metastore/variables.tf new file mode 100644 index 00000000..5ead2345 --- /dev/null +++ b/databricks-metastore/variables.tf @@ -0,0 +1,68 @@ +variable "databricks_external_id" { + type = string + description = "External ID for Databricks account" +} + +variable "tags" { + type = object({ project : string, env : string, service : string, owner : string }) + description = "Fogg generated tags for the environment" +} + +variable "deletion_window_in_days" { + type = number + description = "Deletion window in days for S3 encryption key" + default = 7 +} + +variable "enable_key_rotation" { + type = bool + description = "Enable key rotation for S3 encryption key" + default = true +} + +variable "delta_sharing_scope" { + type = string + description = "Delta sharing scope" + default = "INTERNAL" +} + +variable "delta_sharing_recipient_token_lifetime_in_seconds" { + type = number + description = "Lifetime of delta sharing recipient token in seconds" + default = 3600 +} + +variable "force_destroy" { + type = bool + description = "Force destroy metastore if data exists" + default = false +} + +variable "workspaces" { + type = map(string) + description = "Map of workspace names to ids to associate with this metastore" + default = {} +} + +variable "admin_groups" { + type = list(string) + description = "List of databricks groups to grant admin access for metastore; includes owner by default" + default = [] +} + +variable "owner" { + type = string + description = "Owner of the metastore; should be a group display name" + default = "data-infra-admin" +} + +variable "powerusers" { + type = list(string) + description = "List of databricks groups to grant poweruser access for metastore" + default = ["powerusers"] +} + +variable "workspace_url" { + type = string + description = "URL of the workspace to use to create this metastore" +} diff --git a/databricks-metastore/versions.tf b/databricks-metastore/versions.tf new file mode 100644 index 00000000..159e8002 --- /dev/null +++ b/databricks-metastore/versions.tf @@ -0,0 +1,11 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } + required_version = ">= 1.3.0" +} From 67c02c7e17442001c5bd6f1e5ee50aee28d87b80 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Mon, 30 Oct 2023 18:10:53 -0700 Subject: [PATCH 04/19] add databricks-workspace module (#529) --- databricks-workspace-e2/README.md | 64 +++++ databricks-workspace-e2/aws_iam_role.tf | 282 ++++++++++++++++++++++ databricks-workspace-e2/bucket.tf | 33 +++ databricks-workspace-e2/main.tf | 48 ++++ databricks-workspace-e2/outputs.tf | 14 ++ databricks-workspace-e2/security_group.tf | 30 +++ databricks-workspace-e2/variables.tf | 63 +++++ databricks-workspace-e2/versions.tf | 11 + 8 files changed, 545 insertions(+) create mode 100644 databricks-workspace-e2/README.md create mode 100644 databricks-workspace-e2/aws_iam_role.tf create mode 100644 databricks-workspace-e2/bucket.tf create mode 100644 databricks-workspace-e2/main.tf create mode 100644 databricks-workspace-e2/outputs.tf create mode 100644 databricks-workspace-e2/security_group.tf create mode 100644 databricks-workspace-e2/variables.tf create mode 100644 databricks-workspace-e2/versions.tf diff --git a/databricks-workspace-e2/README.md b/databricks-workspace-e2/README.md new file mode 100644 index 00000000..9f879b4f --- /dev/null +++ b/databricks-workspace-e2/README.md @@ -0,0 +1,64 @@ +## References +* [Here](https://databrickslabs.github.io/terraform-provider-databricks/overview/) is the provider docs. + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | n/a | +| [databricks](#provider\_databricks) | n/a | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [databricks\_bucket](#module\_databricks\_bucket) | github.com/chanzuckerberg/cztack//aws-s3-private-bucket | v0.60.1 | + +## Resources + +| Name | Type | +|------|------| +| [aws_iam_role.databricks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy.policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_security_group.databricks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | +| [databricks_mws_credentials.databricks](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_credentials) | resource | +| [databricks_mws_networks.networking](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_networks) | resource | +| [databricks_mws_storage_configurations.databricks](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_storage_configurations) | resource | +| [databricks_mws_workspaces.databricks](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_workspaces) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_policy_document.databricks-s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.databricks-setup-assume-role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [audit\_log\_bucket\_name](#input\_audit\_log\_bucket\_name) | Name of bucket to write cluster logs to - also where the audit logs go, too | `string` | `"czi-audit-logs"` | no | +| [databricks\_external\_id](#input\_databricks\_external\_id) | The ID of a Databricks root account. | `string` | n/a | yes | +| [env](#input\_env) | The environment / stage. Aka staging, dev, prod. | `string` | n/a | yes | +| [object\_ownership](#input\_object\_ownership) | Set default owner of all objects within bucket (e.g., bucket vs. object owner) | `string` | `null` | no | +| [owner](#input\_owner) | n/a | `string` | n/a | yes | +| [passable\_role\_arn](#input\_passable\_role\_arn) | A role to allow the cross-account role to pass to other accounts | `string` | `""` | no | +| [private\_subnets](#input\_private\_subnets) | List of private subnets. | `list(string)` | n/a | yes | +| [project](#input\_project) | A high level name, typically the name of the site. | `string` | n/a | yes | +| [service](#input\_service) | The service. Aka databricks-workspace. | `string` | n/a | yes | +| [vpc\_id](#input\_vpc\_id) | ID of the VPC. | `string` | n/a | yes | +| [workspace\_name\_override](#input\_workspace\_name\_override) | Override the workspace name. If not set, the workspace name will be set to the project, env, and service. | `string` | `null` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [role\_arn](#output\_role\_arn) | ARN of the AWS IAM role. | +| [workspace\_id](#output\_workspace\_id) | ID of the workspace. | +| [workspace\_url](#output\_workspace\_url) | Url of the deployed workspace. | + diff --git a/databricks-workspace-e2/aws_iam_role.tf b/databricks-workspace-e2/aws_iam_role.tf new file mode 100644 index 00000000..6dd00cfd --- /dev/null +++ b/databricks-workspace-e2/aws_iam_role.tf @@ -0,0 +1,282 @@ +locals { + cluster_log_bucket_prefix = "databricks-cluster-logs" +} + +data "aws_iam_policy_document" "databricks-setup-assume-role" { + statement { + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${local.databricks_aws_account}:root"] + } + + actions = ["sts:AssumeRole"] + condition { + test = "StringLike" + variable = "sts:ExternalId" + values = [var.databricks_external_id] + } + } +} + +resource "aws_iam_role" "databricks" { + name = local.name + assume_role_policy = data.aws_iam_policy_document.databricks-setup-assume-role.json + tags = local.tags +} + +data "aws_iam_policy_document" "policy" { + statement { + sid = "NonResourceBasedPermissions" + actions = [ + "ec2:CancelSpotInstanceRequests", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeIamInstanceProfileAssociations", + "ec2:DescribeInstanceStatus", + "ec2:DescribeInstances", + "ec2:DescribeInternetGateways", + "ec2:DescribeNatGateways", + "ec2:DescribeNetworkAcls", + "ec2:DescribePlacementGroups", + "ec2:DescribePrefixLists", + "ec2:DescribeReservedInstancesOfferings", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSpotInstanceRequests", + "ec2:DescribeSpotPriceHistory", + "ec2:DescribeSubnets", + "ec2:DescribeVolumes", + "ec2:DescribeVpcAttribute", + "ec2:DescribeVpcs", + "ec2:CreatePlacementGroup", + "ec2:DeletePlacementGroup", + "ec2:CreateKeyPair", + "ec2:DeleteKeyPair", + "ec2:CreateTags", + "ec2:DeleteTags", + "ec2:RequestSpotInstances", + ] + resources = ["*"] + effect = "Allow" + } + + statement { + effect = "Allow" + actions = ["iam:PassRole"] + resources = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/databricks/*"] + } + + dynamic "statement" { + for_each = length(var.passable_role_arn) > 0 ? [1] : [] + + content { + actions = [ + "iam:PassRole" + ] + resources = [ + var.passable_role_arn + ] + } + } + + statement { + sid = "InstancePoolsSupport" + actions = [ + "ec2:AssociateIamInstanceProfile", + "ec2:DisassociateIamInstanceProfile", + "ec2:ReplaceIamInstanceProfileAssociation", + ] + + resources = ["${local.ec2_arn_base}:instance/*"] + + condition { + test = "StringEquals" + variable = "ec2:ResourceTag/Vendor" + values = ["Databricks"] + } + } + + statement { + sid = "AllowEc2RunInstancePerTag" + actions = [ + "ec2:RunInstances", + ] + + resources = [ + "${local.ec2_arn_base}:instance/*", + "${local.ec2_arn_base}:volume/*", + ] + + condition { + test = "StringEquals" + variable = "aws:RequestTag/Vendor" + values = ["Databricks"] + } + } + + statement { + sid = "AllowEc2RunInstanceImagePerTag" + actions = [ + "ec2:RunInstances", + ] + + resources = [ + "${local.ec2_arn_base}:image/*", + ] + + condition { + test = "StringEquals" + variable = "aws:ResourceTag/Vendor" + values = ["Databricks"] + } + } + + statement { + sid = "AllowEc2RunInstancePerVPCid" + actions = [ + "ec2:RunInstances", + ] + + resources = [ + "${local.ec2_arn_base}:network-interface/*", + "${local.ec2_arn_base}:subnet/*", + "${local.ec2_arn_base}:security-group/*", + ] + + condition { + test = "StringEquals" + variable = "ec2:vpc" + values = ["${local.ec2_arn_base}:vpc/${var.vpc_id}"] + } + } + + statement { + sid = "AllowEc2RunInstanceOtherResources" + actions = [ + "ec2:RunInstances", + ] + + not_resources = [ + "${local.ec2_arn_base}:image/*", + "${local.ec2_arn_base}:network-interface/*", + "${local.ec2_arn_base}:subnet/*", + "${local.ec2_arn_base}:security-group/*", + "${local.ec2_arn_base}:volume/*", + "${local.ec2_arn_base}:instance/*" + ] + } + + statement { + sid = "EC2TerminateInstancesTag" + actions = [ + "ec2:TerminateInstances", + ] + + resources = [ + "${local.ec2_arn_base}:instance/*", + ] + + condition { + test = "StringEquals" + variable = "ec2:ResourceTag/Vendor" + values = ["Databricks"] + } + } + + statement { + sid = "EC2AttachDetachVolumeTag" + actions = [ + "ec2:AttachVolume", + "ec2:DetachVolume", + ] + + resources = [ + "${local.ec2_arn_base}:instance/*", + "${local.ec2_arn_base}:volume/*", + ] + + condition { + test = "StringEquals" + variable = "ec2:ResourceTag/Vendor" + values = ["Databricks"] + } + } + + statement { + sid = "EC2CreateVolumeByTag" + actions = [ + "ec2:CreateVolume", + ] + + resources = [ + "${local.ec2_arn_base}:volume/*", + ] + + condition { + test = "StringEquals" + variable = "aws:RequestTag/Vendor" + values = ["Databricks"] + } + } + + statement { + sid = "EC2DeleteVolumeByTag" + actions = [ + "ec2:DeleteVolume", + ] + + resources = [ + "${local.ec2_arn_base}:volume/*", + ] + + condition { + test = "StringEquals" + variable = "ec2:ResourceTag/Vendor" + values = ["Databricks"] + } + } + + statement { + actions = [ + "iam:CreateServiceLinkedRole", + "iam:PutRolePolicy", + ] + + resources = [ + "arn:aws:iam::*:role/aws-service-role/spot.amazonaws.com/AWSServiceRoleForEC2Spot", + ] + + condition { + test = "StringLike" + variable = "iam:AWSServiceName" + values = ["spot.amazonaws.com"] + } + + effect = "Allow" + } + + statement { + sid = "VpcNonresourceSpecificActions" + actions = [ + "ec2:AuthorizeSecurityGroupEgress", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupEgress", + "ec2:RevokeSecurityGroupIngress", + ] + + resources = [ + "${local.ec2_arn_base}:security-group/${aws_security_group.databricks.id}", + ] + + condition { + test = "StringEquals" + variable = "ec2:vpc" + values = ["${local.ec2_arn_base}:vpc/${var.vpc_id}"] + } + } +} + +resource "aws_iam_role_policy" "policy" { + name = "extras" + role = aws_iam_role.databricks.id + policy = data.aws_iam_policy_document.policy.json +} diff --git a/databricks-workspace-e2/bucket.tf b/databricks-workspace-e2/bucket.tf new file mode 100644 index 00000000..b2095387 --- /dev/null +++ b/databricks-workspace-e2/bucket.tf @@ -0,0 +1,33 @@ +data "aws_iam_policy_document" "databricks-s3" { + statement { + sid = "grant databricks access" + effect = "Allow" + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${local.databricks_aws_account}:root"] + } + actions = [ + "s3:GetObject", + "s3:GetObjectVersion", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket", + "s3:GetBucketLocation", + ] + resources = [ + "arn:aws:s3:::${local.name}/*", + "arn:aws:s3:::${local.name}", + ] + } +} + +module "databricks_bucket" { + source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.60.1" + bucket_name = local.name + bucket_policy = data.aws_iam_policy_document.databricks-s3.json + project = var.project + env = var.env + service = var.service + owner = var.owner + object_ownership = var.object_ownership +} diff --git a/databricks-workspace-e2/main.tf b/databricks-workspace-e2/main.tf new file mode 100644 index 00000000..e0393532 --- /dev/null +++ b/databricks-workspace-e2/main.tf @@ -0,0 +1,48 @@ +// https://docs.databricks.com/administration-guide/multiworkspace/iam-role.html#language-Your%C2%A0VPC,%C2%A0custom +locals { + databricks_aws_account = "414351767826" # Databricks' own AWS account, not CZI's. See https://docs.databricks.com/en/administration-guide/account-settings-e2/credentials.html#step-1-create-a-cross-account-iam-role + ec2_arn_base = "arn:aws:ec2:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}" + name = coalesce(var.workspace_name_override, "${var.project}-${var.env}-${var.service}") + security_group_ids = [aws_security_group.databricks.id] + tags = { + project = var.project + env = var.env + service = var.service + owner = var.owner + managedBy = "terraform" + } +} + +data "aws_caller_identity" "current" {} + +data "aws_region" "current" {} + +resource "databricks_mws_networks" "networking" { + account_id = var.databricks_external_id + network_name = local.name + vpc_id = var.vpc_id + subnet_ids = var.private_subnets + security_group_ids = local.security_group_ids +} + +resource "databricks_mws_storage_configurations" "databricks" { + account_id = var.databricks_external_id + storage_configuration_name = local.name + bucket_name = module.databricks_bucket.id +} + +resource "databricks_mws_credentials" "databricks" { + account_id = var.databricks_external_id + credentials_name = local.name + role_arn = aws_iam_role.databricks.arn +} + +resource "databricks_mws_workspaces" "databricks" { + account_id = var.databricks_external_id + workspace_name = local.name + deployment_name = local.name + aws_region = data.aws_region.current.name + credentials_id = databricks_mws_credentials.databricks.credentials_id + storage_configuration_id = databricks_mws_storage_configurations.databricks.storage_configuration_id + network_id = databricks_mws_networks.networking.network_id +} diff --git a/databricks-workspace-e2/outputs.tf b/databricks-workspace-e2/outputs.tf new file mode 100644 index 00000000..fb972c47 --- /dev/null +++ b/databricks-workspace-e2/outputs.tf @@ -0,0 +1,14 @@ +output "workspace_id" { + description = "ID of the workspace." + value = databricks_mws_workspaces.databricks.workspace_id +} + +output "workspace_url" { + description = "Url of the deployed workspace." + value = databricks_mws_workspaces.databricks.workspace_url +} + +output "role_arn" { + description = "ARN of the AWS IAM role." + value = aws_iam_role.databricks.arn +} diff --git a/databricks-workspace-e2/security_group.tf b/databricks-workspace-e2/security_group.tf new file mode 100644 index 00000000..3a2c9cf9 --- /dev/null +++ b/databricks-workspace-e2/security_group.tf @@ -0,0 +1,30 @@ +resource "aws_security_group" "databricks" { + name = local.name + description = "self tcp and udp all ports and all outbound" + vpc_id = var.vpc_id + + ingress { + description = "self tcp all ports" + from_port = 0 + to_port = 65535 + protocol = "tcp" + self = true + } + + ingress { + description = "self udp all ports" + from_port = 0 + to_port = 65535 + protocol = "udp" + self = true + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = local.tags +} diff --git a/databricks-workspace-e2/variables.tf b/databricks-workspace-e2/variables.tf new file mode 100644 index 00000000..7eb3f9d2 --- /dev/null +++ b/databricks-workspace-e2/variables.tf @@ -0,0 +1,63 @@ +variable "vpc_id" { + description = "ID of the VPC." + type = string +} + +variable "private_subnets" { + description = "List of private subnets." + type = list(string) +} + +variable "databricks_external_id" { + description = "The ID of a Databricks root account." + type = string +} + +variable "project" { + description = "A high level name, typically the name of the site." + type = string +} + +variable "env" { + description = "The environment / stage. Aka staging, dev, prod." + type = string +} + +variable "service" { + description = "The service. Aka databricks-workspace." + type = string +} + +variable "owner" { + type = string +} + +variable "passable_role_arn" { + description = "A role to allow the cross-account role to pass to other accounts" + type = string + default = "" +} + +# check if argument is null or is in list (2nd parameter of contains() cannot be null) +variable "object_ownership" { + type = string + default = null + description = "Set default owner of all objects within bucket (e.g., bucket vs. object owner)" + + validation { + condition = var.object_ownership == null ? true : contains(["BucketOwnerEnforced", "BucketOwnerPreferred", "ObjectWriter"], var.object_ownership) + error_message = "Valid values for var.object_ownership are ('BucketOwnerEnforced', 'BucketOwnerPreferred', 'ObjectWriter')." + + } +} + +variable "audit_log_bucket_name" { + type = string + description = "Name of bucket to write cluster logs to - also where the audit logs go, too" +} + +variable "workspace_name_override" { + type = string + default = null + description = "Override the workspace name. If not set, the workspace name will be set to the project, env, and service." +} \ No newline at end of file diff --git a/databricks-workspace-e2/versions.tf b/databricks-workspace-e2/versions.tf new file mode 100644 index 00000000..159e8002 --- /dev/null +++ b/databricks-workspace-e2/versions.tf @@ -0,0 +1,11 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + } + databricks = { + source = "databricks/databricks" + } + } + required_version = ">= 1.3.0" +} From 2e5974a61defa36d339a1a28ce7c90a17bd22685 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Tue, 31 Oct 2023 13:12:50 -0700 Subject: [PATCH 05/19] feat: CDI-2183 Add databricks-cluster-log-permissions module (#532) * Add databricks-cluster-log-permissions module * bump TF version * Fix comments --- databricks-cluster-log-permissions/README.md | 61 ++++++ databricks-cluster-log-permissions/main.tf | 200 ++++++++++++++++++ databricks-cluster-log-permissions/outputs.tf | 14 ++ .../providers.tf | 10 + .../variables.tf | 46 ++++ .../versions.tf | 8 + 6 files changed, 339 insertions(+) create mode 100644 databricks-cluster-log-permissions/README.md create mode 100644 databricks-cluster-log-permissions/main.tf create mode 100644 databricks-cluster-log-permissions/outputs.tf create mode 100644 databricks-cluster-log-permissions/providers.tf create mode 100644 databricks-cluster-log-permissions/variables.tf create mode 100644 databricks-cluster-log-permissions/versions.tf diff --git a/databricks-cluster-log-permissions/README.md b/databricks-cluster-log-permissions/README.md new file mode 100644 index 00000000..055a1808 --- /dev/null +++ b/databricks-cluster-log-permissions/README.md @@ -0,0 +1,61 @@ +# README + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | n/a | +| [aws.czi-logs](#provider\_aws.czi-logs) | n/a | +| [databricks](#provider\_databricks) | n/a | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_iam_instance_profile.cluster_log_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | +| [aws_iam_instance_profile.cluster_log_cluster_rw](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | +| [aws_iam_policy.cluster_log_bucket_read_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_policy.cluster_log_bucket_write_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.cluster_log_cluster_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role.cluster_log_rw_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.additional_write_access_attachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.read_access_attachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.write_access_attachment_default_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.write_access_attachment_rw_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_kms_grant.additional_bucket_kms_encryption_key_grant](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_grant) | resource | +| [aws_kms_grant.bucket_kms_encryption_key_grant_default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_grant) | resource | +| [aws_kms_grant.bucket_kms_encryption_key_grant_rw](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_grant) | resource | +| [databricks_instance_profile.cluster_log_cluster](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/instance_profile) | resource | +| [databricks_instance_profile.cluster_log_cluster_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/instance_profile) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_policy_document.assume_role_for_cluster_log_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.cluster_log_bucket_read_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.cluster_log_bucket_write_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [add\_reader](#input\_add\_reader) | Flag to add reader role for logs - should only be invoked for the ie workspace | `bool` | `false` | no | +| [bucket\_kms\_encryption\_key\_arn](#input\_bucket\_kms\_encryption\_key\_arn) | ARN for KMS key used to encrypt bucket for cluster logs | `string` | n/a | yes | +| [env](#input\_env) | Environment name | `string` | n/a | yes | +| [existing\_role\_names](#input\_existing\_role\_names) | List of other existing instance policy roles on the workspace for which to add cluster log write permissions | `list(string)` | `[]` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [default\_logging\_role\_arn](#output\_default\_logging\_role\_arn) | ARN of the AWS IAM role created for default logs access | +| [rw\_logging\_role\_arn](#output\_rw\_logging\_role\_arn) | ARN of the AWS IAM role created for read and write logs access | +| [rw\_logging\_role\_instance\_profile\_arn](#output\_rw\_logging\_role\_instance\_profile\_arn) | ARN of the AWS instance profile created for read and write logs access | + \ No newline at end of file diff --git a/databricks-cluster-log-permissions/main.tf b/databricks-cluster-log-permissions/main.tf new file mode 100644 index 00000000..d88ee5f7 --- /dev/null +++ b/databricks-cluster-log-permissions/main.tf @@ -0,0 +1,200 @@ +# - Creates a standard instance policy to allow clusters to write cluster logs to a destination S3 bucket +# - For a given list of instance profiles, also appends a policy attachment to allow them to write cluster logs, too + +### +locals { + default_role_name = "cluster_log_cluster_role" # standard role for clusters - allows both writing and reading cluster logs for only the same workspace + read_write_role_name = "cluster_log_rw_role" # special role - allows both writing and reading cluster logs for all workspaces + path = "/databricks/" + + # hacky way to validate if this workspace/cluster should have read permissions + # tflint-ignore: terraform_unused_declarations + validate_add_reader = (var.add_reader == true && var.env != var.global_reader_env) ? tobool("add_reader is not supported for this environment") : true + + databricks_bucket_cluster_log_prefix = "cluster-logs" + + # kms grants - all roles can read and write + read_write_operations = ["Encrypt", "GenerateDataKey", "Decrypt"] +} + +data "aws_iam_policy_document" "assume_role_for_cluster_log_cluster" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + principals { + identifiers = ["ec2.amazonaws.com"] + type = "Service" + } + } +} +resource "aws_iam_role" "cluster_log_cluster_role" { + name = local.default_role_name + path = local.path + description = "Role for cluster to write to cluster log bucket" + assume_role_policy = data.aws_iam_policy_document.assume_role_for_cluster_log_cluster.json +} + +resource "aws_iam_role" "cluster_log_rw_role" { + count = var.add_reader == true ? 1 : 0 + + name = local.read_write_role_name + path = local.path + description = "Role for cluster to read from and write to cluster log bucket" + assume_role_policy = data.aws_iam_policy_document.assume_role_for_cluster_log_cluster.json +} + +### +## write and limited read access +data "aws_iam_policy_document" "cluster_log_bucket_write_access" { + statement { + sid = "ReadWriteClusterLogs" + actions = [ + "s3:PutObject", + "s3:PutObjectAcl", + "s3:GetObject", + "s3:ListBucket", + "s3:GetBucketLocation" + ] + + resources = [ + "arn:aws:s3:::${var.databricks_logs_bucket_name}/${local.databricks_bucket_cluster_log_prefix}/*", + "arn:aws:s3:::${var.databricks_logs_bucket_name}" + ] + } + statement { + sid = "ReadWriteEncryptedClusterLogs" + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:GenerateDataKey", + ] + + resources = [ + var.bucket_kms_encryption_key_arn + ] + } +} + +resource "aws_iam_policy" "cluster_log_bucket_write_access" { + name = "cluster_log_bucket_write_access_policy" + path = local.path + policy = data.aws_iam_policy_document.cluster_log_bucket_write_access.json +} + +resource "aws_iam_role_policy_attachment" "write_access_attachment_default_role" { + policy_arn = aws_iam_policy.cluster_log_bucket_write_access.arn + role = local.default_role_name +} + +resource "aws_iam_role_policy_attachment" "write_access_attachment_rw_role" { + count = var.add_reader == true ? 1 : 0 + + policy_arn = aws_iam_policy.cluster_log_bucket_write_access.arn + role = local.read_write_role_name +} + +## non-standard global-read access + +data "aws_iam_policy_document" "cluster_log_bucket_read_access" { + count = var.add_reader == true ? 1 : 0 + + statement { + sid = "ReadAllClusterLogs" + actions = [ + "s3:GetObject", + "s3:GetObjectVersion" + ] + + resources = [ + "arn:aws:s3:::${var.databricks_logs_bucket_name}/*", + "arn:aws:s3:::${var.databricks_logs_bucket_name}" + ] + } +} + +resource "aws_iam_policy" "cluster_log_bucket_read_access" { + count = var.add_reader == true ? 1 : 0 + + name = "cluster_log_bucket_read_access_policy" + path = local.path + policy = data.aws_iam_policy_document.cluster_log_bucket_read_access[0].json +} + +resource "aws_iam_role_policy_attachment" "read_access_attachment" { + count = var.add_reader == true ? 1 : 0 + + policy_arn = aws_iam_policy.cluster_log_bucket_read_access[0].arn + role = local.read_write_role_name +} + +## kms access + +data "aws_caller_identity" "current" { + provider = aws +} + +resource "aws_kms_grant" "bucket_kms_encryption_key_grant_default" { + provider = aws.logs_destination + + name = "cluster-log-kms-grant-${data.aws_caller_identity.current.account_id}-write" + key_id = var.bucket_kms_encryption_key_arn + grantee_principal = aws_iam_role.cluster_log_cluster_role.arn + operations = local.read_write_operations +} + +resource "aws_kms_grant" "bucket_kms_encryption_key_grant_rw" { + count = var.add_reader == true ? 1 : 0 + provider = aws.logs_destination + + name = "cluster-log-kms-grant-${data.aws_caller_identity.current.account_id}-read-write" + key_id = var.bucket_kms_encryption_key_arn + grantee_principal = aws_iam_role.cluster_log_rw_role[0].arn + operations = local.read_write_operations +} + +## standard instance profile(s) + +resource "aws_iam_instance_profile" "cluster_log_cluster" { + name = "cluster-log-cluster-instance-profile" + path = local.path + role = aws_iam_role.cluster_log_cluster_role.name +} + +resource "databricks_instance_profile" "cluster_log_cluster" { + depends_on = [aws_iam_instance_profile.cluster_log_cluster] + instance_profile_arn = aws_iam_instance_profile.cluster_log_cluster.arn +} + +resource "aws_iam_instance_profile" "cluster_log_cluster_rw" { + count = var.add_reader == true ? 1 : 0 + + name = "cluster-log-rw-instance-profile" + path = local.path + role = aws_iam_role.cluster_log_rw_role[0].name +} + +resource "databricks_instance_profile" "cluster_log_cluster_rw" { + count = var.add_reader == true ? 1 : 0 + + depends_on = [aws_iam_instance_profile.cluster_log_cluster_rw] + instance_profile_arn = aws_iam_instance_profile.cluster_log_cluster_rw[0].arn +} + +## attach policies to given list of existing instance profiles + +resource "aws_iam_role_policy_attachment" "additional_write_access_attachment" { + for_each = toset(var.existing_role_names) + + policy_arn = aws_iam_policy.cluster_log_bucket_write_access.arn + role = each.value +} + +resource "aws_kms_grant" "additional_bucket_kms_encryption_key_grant" { + for_each = toset(var.existing_role_names) + provider = aws.logs_destination + + name = "cluster-log-kms-grant-${data.aws_caller_identity.current.account_id}" + key_id = var.bucket_kms_encryption_key_arn + grantee_principal = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/databricks/${each.value}" + operations = local.read_write_operations +} \ No newline at end of file diff --git a/databricks-cluster-log-permissions/outputs.tf b/databricks-cluster-log-permissions/outputs.tf new file mode 100644 index 00000000..7612fc1e --- /dev/null +++ b/databricks-cluster-log-permissions/outputs.tf @@ -0,0 +1,14 @@ +output "default_logging_role_arn" { + description = "ARN of the AWS IAM role created for default logs access" + value = aws_iam_role.cluster_log_cluster_role.arn +} + +output "rw_logging_role_arn" { + description = "ARN of the AWS IAM role created for read and write logs access" + value = one(aws_iam_role.cluster_log_rw_role[*].arn) +} + +output "rw_logging_role_instance_profile_arn" { + description = "ARN of the AWS instance profile created for read and write logs access" + value = one(aws_iam_instance_profile.cluster_log_cluster_rw[*].arn) +} \ No newline at end of file diff --git a/databricks-cluster-log-permissions/providers.tf b/databricks-cluster-log-permissions/providers.tf new file mode 100644 index 00000000..49695529 --- /dev/null +++ b/databricks-cluster-log-permissions/providers.tf @@ -0,0 +1,10 @@ +provider "aws" { + alias = "logs_destination" + region = var.destination_account_region + + assume_role { + role_arn = "arn:aws:iam::${var.destination_account_id}:role/${var.destination_account_assume_role_name}" + } + + allowed_account_ids = [var.destination_account_id] +} \ No newline at end of file diff --git a/databricks-cluster-log-permissions/variables.tf b/databricks-cluster-log-permissions/variables.tf new file mode 100644 index 00000000..f349ffa1 --- /dev/null +++ b/databricks-cluster-log-permissions/variables.tf @@ -0,0 +1,46 @@ +variable "env" { + description = "Environment name" + type = string +} + +variable "add_reader" { + description = "Flag to add reader role for logs - should only be invoked for the ie workspace" + type = bool + default = false +} + +variable "bucket_kms_encryption_key_arn" { + description = "ARN for KMS key used to encrypt bucket for cluster logs" + type = string +} + +variable "existing_role_names" { + description = "List of other existing instance policy roles on the workspace for which to add cluster log write permissions" + type = list(string) + default = [] +} + +variable "databricks_logs_bucket_name" { + description = "Name of the bucket to store cluster logs" + type = string +} + +variable "global_reader_env" { + description = "Name of env to grant global logs reader access to" + type = string +} + +variable "destination_account_id" { + description = "Account ID for the logs destination AWS account" + type = string +} + +variable "destination_account_region" { + description = "Region for the logs destination AWS account" + type = string +} + +variable "destination_account_assume_role_name" { + description = "Role name to assume in the logs destination AWS account" + type = string +} \ No newline at end of file diff --git a/databricks-cluster-log-permissions/versions.tf b/databricks-cluster-log-permissions/versions.tf new file mode 100644 index 00000000..2106c4d3 --- /dev/null +++ b/databricks-cluster-log-permissions/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + } + } + required_version = ">= 1.3.0" +} From 5f42e9bbb2eafdbde5a3afbc0d0fc1aa6d4093b9 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Tue, 31 Oct 2023 13:13:06 -0700 Subject: [PATCH 06/19] add databricks-cluster-policy (#530) --- databricks-cluster-policy/README.md | 42 +++++++++++++++++ databricks-cluster-policy/main.tf | 63 ++++++++++++++++++++++++++ databricks-cluster-policy/outputs.tf | 0 databricks-cluster-policy/variables.tf | 32 +++++++++++++ databricks-cluster-policy/versions.tf | 8 ++++ 5 files changed, 145 insertions(+) create mode 100644 databricks-cluster-policy/README.md create mode 100644 databricks-cluster-policy/main.tf create mode 100644 databricks-cluster-policy/outputs.tf create mode 100644 databricks-cluster-policy/variables.tf create mode 100644 databricks-cluster-policy/versions.tf diff --git a/databricks-cluster-policy/README.md b/databricks-cluster-policy/README.md new file mode 100644 index 00000000..2da77c8f --- /dev/null +++ b/databricks-cluster-policy/README.md @@ -0,0 +1,42 @@ +# README + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | + +## Providers + +| Name | Version | +|------|---------| +| [databricks](#provider\_databricks) | n/a | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [databricks_cluster_policy.custom_cluster_policy](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/cluster_policy) | resource | +| [databricks_cluster_policy.inherited_cluster_policy](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/cluster_policy) | resource | +| [databricks_permissions.can_use_custom_cluster_policy](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/permissions) | resource | +| [databricks_permissions.can_use_inherited_cluster_policy](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/permissions) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [databricks\_host](#input\_databricks\_host) | Databricks host name for tagging | `string` | n/a | yes | +| [databricks\_workspace\_id](#input\_databricks\_workspace\_id) | Databricks workspace\_id for tagging | `string` | n/a | yes | +| [grantees](#input\_grantees) | Names of groups to be granted use access to the policy - must already exist | `list(string)` | `[]` | no | +| [policy\_family\_id](#input\_policy\_family\_id) | ID of policy family to inherit from | `string` | `null` | no | +| [policy\_name](#input\_policy\_name) | Name of cluster policy | `string` | n/a | yes | +| [policy\_overrides](#input\_policy\_overrides) | Cluster policy overrides | `any` | `{}` | no | + +## Outputs + +No outputs. + \ No newline at end of file diff --git a/databricks-cluster-policy/main.tf b/databricks-cluster-policy/main.tf new file mode 100644 index 00000000..60ce2c6c --- /dev/null +++ b/databricks-cluster-policy/main.tf @@ -0,0 +1,63 @@ +locals { + # default policy attributes that can be overridden but are otherwise + # included for each policy + default_policy = { + "custom_tags.Cluster_Policy" : { + "type" : "fixed", + "value" : var.policy_name + }, + "custom_tags.Databricks_Workspace_Id" : { + "type" : "fixed", + "value" : var.databricks_workspace_id + }, + "custom_tags.Databricks_Host" : { + "type" : "fixed", + "value" : var.databricks_host + }, + } + + # Workaround for looping over grantees and setting resource count + inherited_cluster_policy_grantees = toset([for grantee in var.grantees : grantee if var.policy_family_id != null]) + custom_cluster_policy_grantees = toset([for grantee in var.grantees : grantee if var.policy_family_id == null]) +} + +## Messy implementation below - cannot set policy_family_id and/or policy_family_definiton_overrides +## if definition is present, and setting them to null still triggers an error from the provider, so +## we duplicate the setup and set a count on the var being present + +### if inherited cluster policy +resource "databricks_cluster_policy" "inherited_cluster_policy" { + count = var.policy_family_id != null ? 1 : 0 + + name = var.policy_name + policy_family_definition_overrides = jsonencode(merge(local.default_policy, var.policy_overrides)) + policy_family_id = var.policy_family_id +} + +resource "databricks_permissions" "can_use_inherited_cluster_policy" { + for_each = local.inherited_cluster_policy_grantees + + cluster_policy_id = databricks_cluster_policy.inherited_cluster_policy[0].id + access_control { + group_name = each.value + permission_level = "CAN_USE" + } +} + +### if custom cluster policy +resource "databricks_cluster_policy" "custom_cluster_policy" { + count = var.policy_family_id == null ? 1 : 0 + + name = var.policy_name + definition = jsonencode(merge(local.default_policy, var.policy_overrides)) +} + +resource "databricks_permissions" "can_use_custom_cluster_policy" { + for_each = local.custom_cluster_policy_grantees + + cluster_policy_id = databricks_cluster_policy.custom_cluster_policy[0].id + access_control { + group_name = each.value + permission_level = "CAN_USE" + } +} \ No newline at end of file diff --git a/databricks-cluster-policy/outputs.tf b/databricks-cluster-policy/outputs.tf new file mode 100644 index 00000000..e69de29b diff --git a/databricks-cluster-policy/variables.tf b/databricks-cluster-policy/variables.tf new file mode 100644 index 00000000..10d6fbc6 --- /dev/null +++ b/databricks-cluster-policy/variables.tf @@ -0,0 +1,32 @@ +variable "policy_name" { + description = "Name of cluster policy" + type = string +} + +variable "databricks_workspace_id" { + description = "Databricks workspace_id for tagging" + type = string +} + +variable "databricks_host" { + description = "Databricks host name for tagging" + type = string +} + +variable "policy_family_id" { + description = "ID of policy family to inherit from" + type = string + default = null +} + +variable "policy_overrides" { + description = "Cluster policy overrides" + type = any + default = {} +} + +variable "grantees" { + description = "Names of groups to be granted use access to the policy - must already exist" + type = list(string) + default = [] +} \ No newline at end of file diff --git a/databricks-cluster-policy/versions.tf b/databricks-cluster-policy/versions.tf new file mode 100644 index 00000000..9cd525d6 --- /dev/null +++ b/databricks-cluster-policy/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + } + } + required_version = ">= 0.13" +} From 4c70f295cefb5013590e6533b6ae6e09efc52a0c Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Tue, 31 Oct 2023 13:13:16 -0700 Subject: [PATCH 07/19] feat: CDI-2182 Add databricks-default-cluster-policy module (#531) * add databricks-cluster-policy * Add default cluster policies module --- databricks-default-cluster-policies/README.md | 48 +++ databricks-default-cluster-policies/main.tf | 380 ++++++++++++++++++ .../outputs.tf | 0 .../variables.tf | 26 ++ .../versions.tf | 8 + 5 files changed, 462 insertions(+) create mode 100644 databricks-default-cluster-policies/README.md create mode 100644 databricks-default-cluster-policies/main.tf create mode 100644 databricks-default-cluster-policies/outputs.tf create mode 100644 databricks-default-cluster-policies/variables.tf create mode 100644 databricks-default-cluster-policies/versions.tf diff --git a/databricks-default-cluster-policies/README.md b/databricks-default-cluster-policies/README.md new file mode 100644 index 00000000..2b8c1bfe --- /dev/null +++ b/databricks-default-cluster-policies/README.md @@ -0,0 +1,48 @@ + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | n/a | +| [databricks](#provider\_databricks) | n/a | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [job\_compute\_cluster\_policy](#module\_job\_compute\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [large\_gpu\_large\_clusters\_cluster\_policy](#module\_large\_gpu\_large\_clusters\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [large\_gpu\_personal\_cluster\_policy](#module\_large\_gpu\_personal\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [large\_gpu\_small\_clusters\_cluster\_policy](#module\_large\_gpu\_small\_clusters\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [large\_personal\_compute\_cluster\_policy](#module\_large\_personal\_compute\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [legacy\_shared\_compute\_cluster\_policy](#module\_legacy\_shared\_compute\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [personal\_compute\_cluster\_policy](#module\_personal\_compute\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [power\_user\_compute\_cluster\_policy](#module\_power\_user\_compute\_cluster\_policy) | ../databricks-cluster-policy | n/a | +| [small\_clusters](#module\_small\_clusters) | ../databricks-cluster-policy | n/a | +| [superset\_compute\_cluster\_policy](#module\_superset\_compute\_cluster\_policy) | ../databricks-cluster-policy | n/a | + +## Resources + +| Name | Type | +|------|------| +| [databricks_group.power_user_group](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/group) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [additional\_instance\_profile\_arns](#input\_additional\_instance\_profile\_arns) | Additional instance profiles to allow clusters to run on | `list(string)` | `[]` | no | +| [databricks\_host](#input\_databricks\_host) | Databricks host name for tagging | `string` | n/a | yes | +| [databricks\_workspace\_id](#input\_databricks\_workspace\_id) | Databricks workspace\_id for tagging | `string` | n/a | yes | + +## Outputs + +No outputs. + diff --git a/databricks-default-cluster-policies/main.tf b/databricks-default-cluster-policies/main.tf new file mode 100644 index 00000000..355febb0 --- /dev/null +++ b/databricks-default-cluster-policies/main.tf @@ -0,0 +1,380 @@ +data "aws_caller_identity" "current" { + provider = aws +} +locals { + power_user_group_name = "Power Users" + all_users_group_name = "users" + + default_policy_family_ids = { + job_compute : "job-cluster", + legacy_shared_compute : "shared-data-science", + personal_compute : "personal-vm", + power_user_compute : "power-user", + shared_compute : "shared-compute", + } + + default_cluster_instance_profile = "cluster-log-cluster-instance-profile" + default_cluster_instance_profile_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:instance-profile/databricks/${local.default_cluster_instance_profile}" + + logging_override = { + # set log destination - one prefix per workspace since cluster ids are only unique per workspace + "cluster_log_conf.path" : { + "type" : "fixed", + "value" : "s3://${var.databricks_log_bucket}/cluster-logs/${var.databricks_workspace_id}" + }, + + "cluster_log_conf.type" : { + "type" : "fixed", + "value" : "S3" + } + + # require using an instance profile that has logging enabled + "aws_attributes.instance_profile_arn" : { + "type" : "allowlist", + "values" : concat( + [local.default_cluster_instance_profile_arn], var.additional_instance_profile_arns), + "defaultValue" : local.default_cluster_instance_profile_arn + } + } +} + +resource "databricks_group" "power_user_group" { + display_name = local.power_user_group_name + allow_cluster_create = true + allow_instance_pool_create = false +} + +## Modified Databricks defaults +module "legacy_shared_compute_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Legacy Shared Compute" + policy_family_id = local.default_policy_family_ids["legacy_shared_compute"] + policy_overrides = merge(local.logging_override, { + "data_security_mode" : { + "type" : "fixed", + "hidden" : true, + "value" : "USER_ISOLATION" + } + }) +} + +module "personal_compute_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Personal Compute" + policy_family_id = local.default_policy_family_ids["personal_compute"] + policy_overrides = merge(local.logging_override, { + "autotermination_minutes" : { + "type" : "fixed", + "value" : 120 + }, + "driver_node_type_id" : { + "type" : "regex", + "pattern" : "([rcip]+[3-5]+[d]*\\.[0-1]{0,1}xlarge)", + "hidden" : false + }, + "node_type_id" : { + "type" : "regex", + "pattern" : "([rcip]+[3-5]+[d]*\\.[0-1]{0,1}xlarge)", + "hidden" : false + }, + }) + grantees = [local.all_users_group_name] +} + +module "large_personal_compute_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Large Personal Compute" + policy_family_id = local.default_policy_family_ids["personal_compute"] + policy_overrides = merge(local.logging_override, { + "autotermination_minutes" : { + "type" : "fixed", + "value" : 120 + }, + "driver_node_type_id" : { + "type" : "regex", + "pattern" : "([rcip]+[3-6]+[di]*\\.[0-8]{0,1}xlarge)", + "hidden" : false + }, + "node_type_id" : { + "type" : "regex", + "pattern" : "([rcip]+[3-6]+[di]*\\.[0-8]{0,1}xlarge)", + "hidden" : false + }, + }) +} + +module "power_user_compute_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Power User Compute" + policy_family_id = local.default_policy_family_ids["power_user_compute"] + policy_overrides = merge(local.logging_override, { + "autoscale.max_workers" : { + "type" : "range", + "defaultValue" : 10, + "maxValue" : 20 + }, + "autotermination_minutes" : { + "type" : "fixed", + "value" : 120 + }, + "driver_node_type_id" : { + "type" : "regex", + "pattern" : "([grci]+[3-5]+[dn]*\\.[0-8]{0,1}xlarge)", + "hidden" : false + }, + "node_type_id" : { + "type" : "regex", + "pattern" : "([grci]+[3-5]+[dn]*\\.[0-8]{0,1}xlarge)", + "hidden" : false + }, + }) + + grantees = [local.power_user_group_name] +} +module "job_compute_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Job Compute" + policy_family_id = local.default_policy_family_ids["job_compute"] + + policy_overrides = merge(local.logging_override, { + "spark_conf.spark.databricks.cluster.profile" : { + "type" : "unlimited", + "defaultValue" : "singleNode", + "hidden" : false + } + }) + + grantees = [local.power_user_group_name] +} + +## Fully custom policies +module "large_gpu_large_clusters_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Large GPU Large Clusters" + policy_family_id = local.default_policy_family_ids["power_user_compute"] + policy_overrides = merge(local.logging_override, { + "autoscale.max_workers" : { + "type" : "range", + "minValue" : 1, + "maxValue" : 64, + "defaultValue" : 2 + }, + "autoscale.min_workers" : { + "type" : "range", + "minValue" : 1, + "maxValue" : 3, + "defaultValue" : 1 + }, + "autotermination_minutes" : { + "type" : "fixed", + "value" : 120 + }, + "node_type_id" : { + "type" : "allowlist", + "values" : [ + "g4dn.xlarge", + "g4dn.2xlarge", + "g4dn.4xlarge", + "g4dn.8xlarge", + "g4dn.12xlarge", + "g4dn.16xlarge", + "g5.xlarge", + "g5.8xlarge", + "g5.4xlarge", + "g5.48xlarge", + "g5.2xlarge", + "g5.24xlarge", + "g5.16xlarge", + "g5.12xlarge" + ], + "defaultValue" : "g4dn.xlarge" + }, + }) +} + +module "large_gpu_personal_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Large GPU Personal" + policy_family_id = local.default_policy_family_ids["personal_compute"] + policy_overrides = merge(local.logging_override, { + "autotermination_minutes" : { + "type" : "fixed", + "value" : 120 + } + "node_type_id" : { + "type" : "allowlist", + "values" : [ + "g4dn.xlarge", + "g4dn.2xlarge", + "g4dn.4xlarge", + "g4dn.8xlarge", + "g4dn.12xlarge", + "g4dn.16xlarge", + "g5.xlarge", + "g5.8xlarge", + "g5.4xlarge", + "g5.48xlarge", + "g5.2xlarge", + "g5.24xlarge", + "g5.16xlarge", + "g5.12xlarge" + ], + "defaultValue" : "g4dn.xlarge" + }, + }) +} + +module "large_gpu_small_clusters_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Large GPU Small Clusters" + policy_family_id = local.default_policy_family_ids["power_user_compute"] + policy_overrides = merge(local.logging_override, { + "autoscale.max_workers" : { + "type" : "range", + "minValue" : 1, + "maxValue" : 3, + "defaultValue" : 2 + }, + "autoscale.min_workers" : { + "type" : "range", + "minValue" : 1, + "maxValue" : 3, + "defaultValue" : 1 + }, + "autotermination_minutes" : { + "type" : "fixed", + "value" : 120 + }, + "node_type_id" : { + "type" : "allowlist", + "values" : [ + "g4dn.xlarge", + "g4dn.2xlarge", + "g4dn.4xlarge", + "g4dn.8xlarge", + "g4dn.12xlarge", + "g4dn.16xlarge", + "g5.xlarge", + "g5.8xlarge", + "g5.4xlarge", + "g5.48xlarge", + "g5.2xlarge", + "g5.24xlarge", + "g5.16xlarge", + "g5.12xlarge" + ], + "defaultValue" : "g4dn.xlarge" + }, + }) +} + +module "small_clusters" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Small Clusters" + policy_family_id = local.default_policy_family_ids["personal_compute"] + policy_overrides = merge(local.logging_override, { + "autoscale.max_workers" : { + "type" : "range", + "maxValue" : 3, + "defaultValue" : 3 + }, + "autoscale.min_workers" : { + "type" : "fixed", + "value" : 1, + "hidden" : true + }, + "autotermination_minutes" : { + "type" : "fixed", + "value" : 120, + "hidden" : false + }, + "data_security_mode" : { + "type" : "whitelist", + "values" : [ + "USER_ISOLATION", + "SINGLE_USER" + ] + }, + "driver_node_type_id" : { + "type" : "regex", + "pattern" : "[rmci][3-5][rnad]*\\.[0-2]{0,1}xlarge", + "hidden" : false + }, + "enable_elastic_disk" : { + "type" : "fixed", + "value" : false, + "hidden" : true + }, + "instance_pool_id" : { + "type" : "forbidden", + "hidden" : true + }, + "node_type_id" : { + "type" : "regex", + "pattern" : "[rmci][3-5][rnad]*\\.[0-2]{0,1}xlarge", + "hidden" : false + }, + }) +} + +module "superset_compute_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Superset Compute" + + policy_overrides = merge(local.logging_override, { + "autotermination_minutes" : { + "type" : "fixed", + "value" : 60 + }, + "driver_node_type_id" : { + "type" : "regex", + "pattern" : "([grci]+[3-5]+[dn]*\\.[0-2]{0,1}xlarge)", + "hidden" : false + }, + "node_type_id" : { + "type" : "regex", + "pattern" : "([grci]+[3-5]+[dn]*\\.[0-2]{0,1}xlarge)", + "hidden" : false + }, + "driver_instance_pool_id" : { + "type" : "unlimited", + "isOptional" : true, + "defaultValue" : "superset_pool" + }, + "instance_pool_id" : { + "type" : "unlimited", + "isOptional" : true, + "defaultValue" : "superset_pool" + }, + }) +} diff --git a/databricks-default-cluster-policies/outputs.tf b/databricks-default-cluster-policies/outputs.tf new file mode 100644 index 00000000..e69de29b diff --git a/databricks-default-cluster-policies/variables.tf b/databricks-default-cluster-policies/variables.tf new file mode 100644 index 00000000..02cedd71 --- /dev/null +++ b/databricks-default-cluster-policies/variables.tf @@ -0,0 +1,26 @@ +variable "databricks_workspace_id" { + description = "Databricks workspace_id for tagging" + type = string +} + +variable "databricks_host" { + description = "Databricks host name for tagging" + type = string +} + +variable "additional_instance_profile_arns" { + description = "Additional instance profiles to allow clusters to run on" + type = list(string) + default = [] +} + +variable "databricks_log_bucket" { + description = "Name of S3 bucket to store Databricks logs" + type = string +} + +variable "policy_name_prefix" { + description = "Prefix for policy names" + type = string + default = "" +} \ No newline at end of file diff --git a/databricks-default-cluster-policies/versions.tf b/databricks-default-cluster-policies/versions.tf new file mode 100644 index 00000000..2106c4d3 --- /dev/null +++ b/databricks-default-cluster-policies/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + } + } + required_version = ">= 1.3.0" +} From 7fef82aa47a9dcc5b9e897072406f080e4ddef1f Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Tue, 31 Oct 2023 13:53:11 -0700 Subject: [PATCH 08/19] fix: update readmes and trigger release (#534) --- databricks-cluster-log-permissions/README.md | 6 ++++++ databricks-default-cluster-policies/README.md | 2 ++ 2 files changed, 8 insertions(+) diff --git a/databricks-cluster-log-permissions/README.md b/databricks-cluster-log-permissions/README.md index 055a1808..45cbf8d1 100644 --- a/databricks-cluster-log-permissions/README.md +++ b/databricks-cluster-log-permissions/README.md @@ -50,6 +50,12 @@ No modules. | [bucket\_kms\_encryption\_key\_arn](#input\_bucket\_kms\_encryption\_key\_arn) | ARN for KMS key used to encrypt bucket for cluster logs | `string` | n/a | yes | | [env](#input\_env) | Environment name | `string` | n/a | yes | | [existing\_role\_names](#input\_existing\_role\_names) | List of other existing instance policy roles on the workspace for which to add cluster log write permissions | `list(string)` | `[]` | no | +| [databricks\_logs\_bucke\_name](#input\_databricks\_logs\_bucket\_name) | Name of the bucket to store cluster logs | `string` | n/a | yes | +| [global\_reader\_env](#input\_global\_reader\_env) | Name of env to grant global logs reader access to | `string` | n/a | yes | +| [destination\_account\_id](#input\_destination\_account\_id) | Account ID for the logs destination AWS account | `string` | n/a | yes | +| [destination\_account\_region](#input\_destination\_account\_region) | Region for the logs destination AWS account | `string` | n/a | yes | +| [destination\_account\_assume\_role\_name](#input\_destination\_account\_assume\_role_name) | Role name to assume in the logs destination AWS account | `string` | n/a | yes | + ## Outputs diff --git a/databricks-default-cluster-policies/README.md b/databricks-default-cluster-policies/README.md index 2b8c1bfe..e9af1435 100644 --- a/databricks-default-cluster-policies/README.md +++ b/databricks-default-cluster-policies/README.md @@ -41,6 +41,8 @@ | [additional\_instance\_profile\_arns](#input\_additional\_instance\_profile\_arns) | Additional instance profiles to allow clusters to run on | `list(string)` | `[]` | no | | [databricks\_host](#input\_databricks\_host) | Databricks host name for tagging | `string` | n/a | yes | | [databricks\_workspace\_id](#input\_databricks\_workspace\_id) | Databricks workspace\_id for tagging | `string` | n/a | yes | +| [databricks\_log\_bucket](#input\_databricks\_log\_bucket) | Name of S3 bucket to store Databricks logs | `string` | n/a | yes | +| [policy\_name\_prefix](#input\_policy\_name\_prefix) | Prefix for policy names | `string` | n/a | no | ## Outputs From 04da725feb8806c50260113a07ac596ea2121153 Mon Sep 17 00:00:00 2001 From: "czi-github-helper[bot]" <95879977+czi-github-helper[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 20:56:13 +0000 Subject: [PATCH 09/19] chore(main): release 0.62.0 (#533) Co-authored-by: czi-github-helper[bot] <95879977+czi-github-helper[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ version.txt | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 005dacb8..d9a9e6f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## [0.62.0](https://github.com/chanzuckerberg/cztack/compare/v0.61.0...v0.62.0) (2023-10-31) + + +### Features + +* CDI-2182 Add databricks-default-cluster-policy module ([#531](https://github.com/chanzuckerberg/cztack/issues/531)) ([4c70f29](https://github.com/chanzuckerberg/cztack/commit/4c70f295cefb5013590e6533b6ae6e09efc52a0c)) +* CDI-2183 Add databricks-cluster-log-permissions module ([#532](https://github.com/chanzuckerberg/cztack/issues/532)) ([2e5974a](https://github.com/chanzuckerberg/cztack/commit/2e5974a61defa36d339a1a28ce7c90a17bd22685)) + + +### Bug Fixes + +* update readmes and trigger release ([#534](https://github.com/chanzuckerberg/cztack/issues/534)) ([7fef82a](https://github.com/chanzuckerberg/cztack/commit/7fef82aa47a9dcc5b9e897072406f080e4ddef1f)) + ## [0.61.0](https://github.com/chanzuckerberg/cztack/compare/v0.60.1...v0.61.0) (2023-10-30) diff --git a/version.txt b/version.txt index 0b094550..4d74f323 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.61.0 +0.62.0 From a21509bda6d4bbeb81aaa2afc5fb9bd19f4f86f8 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Tue, 31 Oct 2023 16:35:29 -0700 Subject: [PATCH 10/19] fix: remove unused databricks-workspace-e2 variable (#535) --- databricks-workspace-e2/variables.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/databricks-workspace-e2/variables.tf b/databricks-workspace-e2/variables.tf index 7eb3f9d2..d74bfb5c 100644 --- a/databricks-workspace-e2/variables.tf +++ b/databricks-workspace-e2/variables.tf @@ -51,11 +51,6 @@ variable "object_ownership" { } } -variable "audit_log_bucket_name" { - type = string - description = "Name of bucket to write cluster logs to - also where the audit logs go, too" -} - variable "workspace_name_override" { type = string default = null From 51cd7980278c1263032f9a3f0fd4f265f4804f65 Mon Sep 17 00:00:00 2001 From: "czi-github-helper[bot]" <95879977+czi-github-helper[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 20:01:57 -0700 Subject: [PATCH 11/19] chore(main): release 0.62.1 (#536) Co-authored-by: czi-github-helper[bot] <95879977+czi-github-helper[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ version.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9a9e6f6..606219bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.62.1](https://github.com/chanzuckerberg/cztack/compare/v0.62.0...v0.62.1) (2023-10-31) + + +### Bug Fixes + +* remove unused databricks-workspace-e2 variable ([#535](https://github.com/chanzuckerberg/cztack/issues/535)) ([a21509b](https://github.com/chanzuckerberg/cztack/commit/a21509bda6d4bbeb81aaa2afc5fb9bd19f4f86f8)) + ## [0.62.0](https://github.com/chanzuckerberg/cztack/compare/v0.61.0...v0.62.0) (2023-10-31) diff --git a/version.txt b/version.txt index 4d74f323..e2050de8 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.62.0 +0.62.1 From 770b19e544cca18a6f6e7f3f59800e84f16c1393 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Thu, 2 Nov 2023 18:39:12 -0700 Subject: [PATCH 12/19] fix: Split out job compute policy between single and multi node (#537) --- databricks-default-cluster-policies/main.tf | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/databricks-default-cluster-policies/main.tf b/databricks-default-cluster-policies/main.tf index 355febb0..c32ef4b7 100644 --- a/databricks-default-cluster-policies/main.tf +++ b/databricks-default-cluster-policies/main.tf @@ -151,6 +151,19 @@ module "job_compute_cluster_policy" { policy_name = "${var.policy_name_prefix}Job Compute" policy_family_id = local.default_policy_family_ids["job_compute"] + policy_overrides = local.logging_override + + grantees = [local.power_user_group_name] +} + +module "job_compute_cluster_policy" { + source = "../databricks-cluster-policy" + + databricks_host = var.databricks_host + databricks_workspace_id = var.databricks_workspace_id + policy_name = "${var.policy_name_prefix}Small Job Compute" + policy_family_id = local.default_policy_family_ids["job_compute"] + policy_overrides = merge(local.logging_override, { "spark_conf.spark.databricks.cluster.profile" : { "type" : "unlimited", From 1b4ff1c1cdbc6dde75e0e8575ed8511f664a4989 Mon Sep 17 00:00:00 2001 From: "czi-github-helper[bot]" <95879977+czi-github-helper[bot]@users.noreply.github.com> Date: Fri, 3 Nov 2023 01:41:58 +0000 Subject: [PATCH 13/19] chore(main): release 0.62.2 (#538) Co-authored-by: czi-github-helper[bot] <95879977+czi-github-helper[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ version.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 606219bd..db8955f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.62.2](https://github.com/chanzuckerberg/cztack/compare/v0.62.1...v0.62.2) (2023-11-03) + + +### Bug Fixes + +* Split out job compute policy between single and multi node ([#537](https://github.com/chanzuckerberg/cztack/issues/537)) ([770b19e](https://github.com/chanzuckerberg/cztack/commit/770b19e544cca18a6f6e7f3f59800e84f16c1393)) + ## [0.62.1](https://github.com/chanzuckerberg/cztack/compare/v0.62.0...v0.62.1) (2023-10-31) diff --git a/version.txt b/version.txt index e2050de8..92c648bd 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.62.1 +0.62.2 From 42b328dd1edf200e9672ecd48dba743c0b053500 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Thu, 2 Nov 2023 18:45:53 -0700 Subject: [PATCH 14/19] fix: module name fix --- databricks-default-cluster-policies/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks-default-cluster-policies/main.tf b/databricks-default-cluster-policies/main.tf index c32ef4b7..a1e3ec6c 100644 --- a/databricks-default-cluster-policies/main.tf +++ b/databricks-default-cluster-policies/main.tf @@ -156,7 +156,7 @@ module "job_compute_cluster_policy" { grantees = [local.power_user_group_name] } -module "job_compute_cluster_policy" { +module "small_job_compute_cluster_policy" { source = "../databricks-cluster-policy" databricks_host = var.databricks_host From 3e7c30d4b1bbf0dfa64127d25760ada20afa23ee Mon Sep 17 00:00:00 2001 From: "czi-github-helper[bot]" <95879977+czi-github-helper[bot]@users.noreply.github.com> Date: Fri, 3 Nov 2023 01:48:50 +0000 Subject: [PATCH 15/19] chore(main): release 0.62.3 (#539) Co-authored-by: czi-github-helper[bot] <95879977+czi-github-helper[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ version.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db8955f6..0de11a79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.62.3](https://github.com/chanzuckerberg/cztack/compare/v0.62.2...v0.62.3) (2023-11-03) + + +### Bug Fixes + +* module name fix ([42b328d](https://github.com/chanzuckerberg/cztack/commit/42b328dd1edf200e9672ecd48dba743c0b053500)) + ## [0.62.2](https://github.com/chanzuckerberg/cztack/compare/v0.62.1...v0.62.2) (2023-11-03) diff --git a/version.txt b/version.txt index 92c648bd..eb324e6f 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.62.2 +0.62.3 From 9d4cd22daedfcfa9e5f125f6650ec547bcd35e4c Mon Sep 17 00:00:00 2001 From: Alden Golab Date: Wed, 15 Nov 2023 12:35:35 -0800 Subject: [PATCH 16/19] feat: add pool use to personal compute Databricks policy (#542) --- databricks-default-cluster-policies/main.tf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/databricks-default-cluster-policies/main.tf b/databricks-default-cluster-policies/main.tf index a1e3ec6c..a2a03b37 100644 --- a/databricks-default-cluster-policies/main.tf +++ b/databricks-default-cluster-policies/main.tf @@ -83,6 +83,10 @@ module "personal_compute_cluster_policy" { "pattern" : "([rcip]+[3-5]+[d]*\\.[0-1]{0,1}xlarge)", "hidden" : false }, + "instance_pool_id" : { + type: "allowlist", + values: ["i3-xlarge-pool"] + } }) grantees = [local.all_users_group_name] } From 074b18cf85bcac1c7a1bfb6d147ba8c0f63f4bcc Mon Sep 17 00:00:00 2001 From: "czi-github-helper[bot]" <95879977+czi-github-helper[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 20:38:20 +0000 Subject: [PATCH 17/19] chore(main): release 0.63.0 (#541) Co-authored-by: czi-github-helper[bot] <95879977+czi-github-helper[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ version.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0de11a79..ae7e186a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.63.0](https://github.com/chanzuckerberg/cztack/compare/v0.62.3...v0.63.0) (2023-11-15) + + +### Features + +* add pool use to personal compute Databricks policy ([#542](https://github.com/chanzuckerberg/cztack/issues/542)) ([9d4cd22](https://github.com/chanzuckerberg/cztack/commit/9d4cd22daedfcfa9e5f125f6650ec547bcd35e4c)) + ## [0.62.3](https://github.com/chanzuckerberg/cztack/compare/v0.62.2...v0.62.3) (2023-11-03) diff --git a/version.txt b/version.txt index eb324e6f..70cd2261 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.62.3 +0.63.0 From 541f8d393351d9492e55cbaa452ff0187106fed0 Mon Sep 17 00:00:00 2001 From: Alden Golab Date: Thu, 16 Nov 2023 10:55:21 -0800 Subject: [PATCH 18/19] fix: personal instance pools var for databricks compute policies (#543) --- databricks-default-cluster-policies/main.tf | 31 ++++++++++++++++--- .../variables.tf | 8 ++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/databricks-default-cluster-policies/main.tf b/databricks-default-cluster-policies/main.tf index a2a03b37..5cb5defd 100644 --- a/databricks-default-cluster-policies/main.tf +++ b/databricks-default-cluster-policies/main.tf @@ -36,6 +36,18 @@ locals { "defaultValue" : local.default_cluster_instance_profile_arn } } + personal_instance_pools = var.personal_compute_pool_ids != [] ? { + "instance_pool_id" : { + "type" : "allowlist", + "isOptional": true, + "values" : var.personal_compute_pool_ids + }, + "driver_node_type_id": { + "type" : "allowlist", + "isOptional": true, + "values" : var.personal_compute_pool_ids + } + } : {} } resource "databricks_group" "power_user_group" { @@ -68,7 +80,7 @@ module "personal_compute_cluster_policy" { databricks_workspace_id = var.databricks_workspace_id policy_name = "${var.policy_name_prefix}Personal Compute" policy_family_id = local.default_policy_family_ids["personal_compute"] - policy_overrides = merge(local.logging_override, { + policy_overrides = merge(local.logging_override, local.personal_instance_pools, { "autotermination_minutes" : { "type" : "fixed", "value" : 120 @@ -83,10 +95,19 @@ module "personal_compute_cluster_policy" { "pattern" : "([rcip]+[3-5]+[d]*\\.[0-1]{0,1}xlarge)", "hidden" : false }, - "instance_pool_id" : { - type: "allowlist", - values: ["i3-xlarge-pool"] - } + "aws_attributes.availability": { + "type": "allowlist", + "values": [ + "ON_DEMAND", + "SPOT_WITH_FALLBACK" + ], + "hidden": false + }, + "runtime_engine": { + "type": "unlimited", + "defaultValue": "STANDARD", + "hidden": false + }, }) grantees = [local.all_users_group_name] } diff --git a/databricks-default-cluster-policies/variables.tf b/databricks-default-cluster-policies/variables.tf index 02cedd71..ab099f8f 100644 --- a/databricks-default-cluster-policies/variables.tf +++ b/databricks-default-cluster-policies/variables.tf @@ -23,4 +23,10 @@ variable "policy_name_prefix" { description = "Prefix for policy names" type = string default = "" -} \ No newline at end of file +} + +variable "personal_compute_pool_ids" { + description = "List of personal compute pool ids allowed" + type = list(string) + default = [] +} From 343028d602cb5453e8a1ea3c08f00ec41068841e Mon Sep 17 00:00:00 2001 From: "czi-github-helper[bot]" <95879977+czi-github-helper[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 11:00:31 -0800 Subject: [PATCH 19/19] chore(main): release 0.63.1 (#544) Co-authored-by: czi-github-helper[bot] <95879977+czi-github-helper[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ version.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae7e186a..24154a4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.63.1](https://github.com/chanzuckerberg/cztack/compare/v0.63.0...v0.63.1) (2023-11-16) + + +### Bug Fixes + +* personal instance pools var for databricks compute policies ([#543](https://github.com/chanzuckerberg/cztack/issues/543)) ([541f8d3](https://github.com/chanzuckerberg/cztack/commit/541f8d393351d9492e55cbaa452ff0187106fed0)) + ## [0.63.0](https://github.com/chanzuckerberg/cztack/compare/v0.62.3...v0.63.0) (2023-11-15) diff --git a/version.txt b/version.txt index 70cd2261..630f2e0c 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.63.0 +0.63.1