Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add acceptance tests for bundle deploy #2254

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion acceptance/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"io"
"os"
"os/exec"
pathlib "path"
"path/filepath"
"runtime"
"slices"
Expand Down Expand Up @@ -65,6 +66,34 @@ func TestInprocessMode(t *testing.T) {
require.Equal(t, 1, testAccept(t, true, "selftest"))
}

// Cache terraform binary in the temporary directory to avoid downloading it in each test.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, this is indeed a needed feature, both for local tests but most importantly for cloud tests (#2242).

Could you make a separate PR just for cached terraform - it's important that we review it well.

Also, could you not depend on side effect of "bundle validate"? Here some scripts/docs on how to prepare terraform.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've seen instructions for air-gapped environments. I'm not sure I will have enough capacity to do it correctly. I think we need to implement it by relying on caching in GitHub actions and adding a step where we download Terraform. The cache key should be based on Terraform and Terraform provider versions. Right now, we don't even do that in our own GitHub action that we provide.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Github actions cache is nice to have, not a must.

I'm not sure I will have enough capacity to do it correctly.

Let's discuss on Slack -- I can help with that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rebase on top of this #2267

func downloadTerraform(execPath, tempHomeDir string) (string, error) {
tmpPath := pathlib.Join(tempHomeDir, "download-terraform")
err := os.Mkdir(tmpPath, 0o755)
if err != nil {
return "", fmt.Errorf("failed to create directory: %w", err)
}

err = os.WriteFile(
pathlib.Join(tmpPath, "databricks.yml"),
[]byte("{bundle: {name: 'download-terraform'}}"),
0o644,
)
if err != nil {
return "", fmt.Errorf("failed to write databricks.yml: %w", err)
}

// bundle validate will download terraform
cmd := exec.Command(execPath, "bundle", "validate")
cmd.Dir = tmpPath
err = cmd.Run()
if err != nil {
return "", fmt.Errorf("failed to run bundle validate: %w", err)
}

return pathlib.Join(tmpPath, ".databricks/bundle/default/bin/terraform"), nil
}

func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
repls := testdiff.ReplacementsContext{}
cwd, err := os.Getwd()
Expand Down Expand Up @@ -118,7 +147,12 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
t.Setenv(env.HomeEnvVar(), homeDir)

// Prevent CLI from downloading terraform in each test:
t.Setenv("DATABRICKS_TF_EXEC_PATH", tempHomeDir)
tfExecPath, err := downloadTerraform(execPath, tempHomeDir)
if err != nil {
t.Fatalf("Failed to download terraform: %v", err)
}

t.Setenv("DATABRICKS_TF_EXEC_PATH", tfExecPath)
}

workspaceClient, err := databricks.NewWorkspaceClient()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
bundle:
name: my_project

experimental:
python:
resources:
- "resources:load_resources"

targets:
dev:
mode: development
default: true
workspace:
host: $DATABRICKS_HOST
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Databricks notebook source
1 + 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from databricks.bundles.core import Bundle, Resources
from databricks.bundles.jobs import Job


def load_resources(bundle: Bundle) -> Resources:
resources = Resources()

my_job = Job.from_dict(
{
"name": "My Job",
"tasks": [
{
"task_key": "my_notebook",
"notebook_task": {
"notebook_path": "my_notebook.py",
},
},
],
}
)

resources.add_job("job1", my_job)

return resources
6 changes: 6 additions & 0 deletions acceptance/bundle/deploy/experimental-python/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

>>> uv run --quiet --python 3.12 --with databricks-bundles==0.7.0 -- $CLI bundle deploy
Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/my_project/dev/files...
Deploying resources...
Updating deployment state...
Deployment complete!
10 changes: 10 additions & 0 deletions acceptance/bundle/deploy/experimental-python/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
cd my_project

envsubst < databricks.yml.tmpl > databricks.yml

trace uv run --quiet --python 3.12 --with databricks-bundles==0.7.0 -- $CLI bundle deploy

# FIXME: doesn't work because fake state is shared and tests run in parallel
# trace $CLI jobs list --output json

rm -rf .databricks __pycache__ databricks.yml .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
bundle:
name: my_project

resources:
jobs:
my_job:
name: My Job
tasks:
- task_key: my_notebook
notebook_task:
notebook_path: my_notebook.py

targets:
dev:
mode: development
default: true
workspace:
host: $DATABRICKS_HOST
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this here? isn't that the default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean that databricks.yml doesn't have to declare targets if env variables are set? Note that we don't use "bundle init" here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean that $DATABRICK_HOST is already what's going to be used (that's how all other tests work), so no need to specify it explicitly and use .tmpl.

That line can just be deleted, it should work.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Databricks notebook source
1 + 1
6 changes: 6 additions & 0 deletions acceptance/bundle/deploy/python-notebook/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

>>> $CLI bundle deploy
Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/my_project/dev/files...
Deploying resources...
Updating deployment state...
Deployment complete!
10 changes: 10 additions & 0 deletions acceptance/bundle/deploy/python-notebook/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
cd my_project

envsubst < databricks.yml.tmpl > databricks.yml

trace $CLI bundle deploy

# FIXME: doesn't work because fake state is shared and tests run in parallel
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use Badness field in test.toml (git grep for examples).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should just fix the fake. It isn't hard

# trace $CLI jobs list --output json

rm -rf databricks.yml .gitignore .databricks
6 changes: 1 addition & 5 deletions acceptance/bundle/scripts/output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,7 @@ from myscript.py 0 postbuild: hello stderr!
Executing 'predeploy' script
from myscript.py 0 predeploy: hello stdout!
from myscript.py 0 predeploy: hello stderr!
Error: unable to deploy to /Workspace/Users/$USERNAME/.bundle/scripts/default/state as $USERNAME.
Please make sure the current user or one of their groups is listed under the permissions of this bundle.
For assistance, contact the owners of this project.
They may need to redeploy the bundle to apply the new permissions.
Please refer to https://docs.databricks.com/dev-tools/bundles/permissions.html for more on managing permissions.
Error: no active lock on target dir: file does not exist: /Workspace/Users/$USERNAME/.bundle/scripts/default/state/deploy.lock


Exit code: 1
4 changes: 2 additions & 2 deletions acceptance/bundle/variables/git-branch/output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"name": "git",
"target": "prod",
"terraform": {
"exec_path": "$TMPHOME"
"exec_path": "$TMPHOME/download-terraform/.databricks/bundle/default/bin/terraform"
}
},
"sync": {
Expand Down Expand Up @@ -60,7 +60,7 @@ Validation OK!
"name": "git",
"target": "dev",
"terraform": {
"exec_path": "$TMPHOME"
"exec_path": "$TMPHOME/download-terraform/.databricks/bundle/default/bin/terraform"
}
},
"sync": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"target": "dev",
"terraform": {
"exec_path": "$TMPHOME"
"exec_path": "$TMPHOME/download-terraform/.databricks/bundle/default/bin/terraform"
}
},
"resources": {
Expand Down
Loading
Loading