Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: CDI-3452 - Fix references for dbx volumes to allow creating volume on existing catalog and bucket #656

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions databricks-s3-volume/bucket.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ locals {
data "aws_iam_policy_document" "databricks-s3" {
count = var.volume_bucket != null ? 0 : 1

override_policy_documents = var.override_policy_documents

# standard UC access
statement {
sid = "dbxBucketAccess"
Expand Down
16 changes: 9 additions & 7 deletions databricks-s3-volume/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ locals {

path = "/databricks/"
databricks_aws_account = "414351767826" # Databricks' own AWS account, not CZI's. See https://docs.databricks.com/en/administration-guide/account-settings-e2/credentials.html#step-1-create-a-cross-account-iam-role
bucket_name = var.volume_bucket != null ? var.volume_bucket : replace(var.catalog_name, "_", "-") # buckets don't work with underscores
bucket_name = var.volume_bucket != null ? var.volume_bucket : (
var.override_bucket_name != null ? var.override_bucket_name : replace(var.catalog_name, "_", "-") # buckets don't work with underscores
)
}

### Databricks storage credential - allows workspace to access an external location.
### NOTE: names need to be unique across an account, not just a workspace
### NOTE:

resource "databricks_storage_credential" "volume" {
count = var.create_catalog ? 1 : 0
count = var.create_storage_credential ? 1 : 0

depends_on = [
resource.aws_iam_role.dbx_unity_aws_role,
Expand All @@ -42,7 +44,7 @@ resource "time_sleep" "wait_30_seconds" {
}

resource "databricks_external_location" "volume" {
count = var.create_catalog ? 1 : 0
count = var.create_storage_credential ? 1 : 0
depends_on = [time_sleep.wait_30_seconds]

name = local.catalog_name
Expand All @@ -59,7 +61,7 @@ resource "databricks_catalog" "volume" {
depends_on = [databricks_external_location.volume[0]]
name = local.catalog_name
metastore_id = var.metastore_id
owner = var.catalog_owner
owner = var.owner
storage_root = "s3://${local.bucket_name}"
comment = "this catalog is managed by terraform - default volume catalog for Databricks workspace ${var.workspace_name}"
properties = {
Expand All @@ -75,7 +77,7 @@ resource "databricks_schema" "volume" {
catalog_name = local.catalog_name
name = local.schema_name
comment = "This schema is managed by Terraform - ${var.volume_comment}"
owner = var.catalog_owner
owner = var.owner
properties = var.volume_schema_properties
}

Expand All @@ -85,7 +87,7 @@ resource "databricks_volume" "volume" {
catalog_name = local.catalog_name
schema_name = local.schema_name
volume_type = "EXTERNAL"
storage_location = "s3://${local.bucket_name}/${local.schema_name}"
owner = var.catalog_owner
storage_location = "s3://${local.bucket_name}/${local.schema_name}/${local.volume_name}"
owner = var.owner
comment = "This volume is managed by Terraform - ${var.volume_comment}"
}
12 changes: 12 additions & 0 deletions databricks-s3-volume/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,16 @@ output "volume_specific_bucket_name" {

output "volume_path" {
value = "${local.catalog_name}.${local.schema_name}.${local.volume_name}"
}

output "catalog_name" {
value = local.catalog_name
}

output "schema_name" {
value = local.schema_name
}

output "volume_name" {
value = local.volume_name
}
22 changes: 20 additions & 2 deletions databricks-s3-volume/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ variable "catalog_name" {
type = string
}

variable "catalog_owner" {
description = "User or group name of the catalog owner"
variable "owner" {
description = "User or group name of the owner - will be applied to the catalog, schema, and volume, if applicable"
type = string
}

Expand Down Expand Up @@ -112,6 +112,24 @@ variable "additional_rw_bucket_grant_arns" {
default = []
}

variable "override_policy_documents" {
description = "(Optional) Additional bucket policies to apply to the bucket. These should already be in JSON"
type = list(string)
default = []
}

variable "create_storage_credential" {
description = "(Optional) Flag to create a new Databricks storage credential or look for an existing one for the given bucket_name"
type = bool
default = true
}

variable "override_bucket_name" {
description = "(Optional) Name of the S3 bucket to create or use for Databricks volume, overriding the default"
type = string
default = null
}

variable "tags" {
description = "REQUIRED: Tags to include for this environment."
type = object({
Expand Down
Loading