Skip to content

Commit

Permalink
Always prepend bundle remote paths with /Workspace (#1724)
Browse files Browse the repository at this point in the history
## Changes
Due to platform changes, all libraries, notebooks and etc. paths used in
Databricks must be started with either /Workspace or /Volumes prefix.

This PR makes sure that all bundle paths are correctly prefixed.

Note: this change is a breaking change if user previously configured and
used `/Workspace/Workspace` folder in their workspace file system or
having `/Workspace/${workspace.root_path}...` pattern configured
anywhere in their bundle config

Fixes: #1751

AI:
- [x] Scan DABs config and error out on
`/Workspace/${workspace.root_path}...` pattern usage

## Tests
Added unit tests

---------

Co-authored-by: Pieter Noordhuis <[email protected]>
  • Loading branch information
andrewnester and pietern authored Oct 2, 2024
1 parent 80d55f4 commit a8cff48
Show file tree
Hide file tree
Showing 16 changed files with 327 additions and 17 deletions.
2 changes: 1 addition & 1 deletion bundle/config/mutator/expand_workspace_root.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func (m *expandWorkspaceRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.
}

if strings.HasPrefix(root, "~/") {
home := fmt.Sprintf("/Users/%s", currentUser.UserName)
home := fmt.Sprintf("/Workspace/Users/%s", currentUser.UserName)
b.Config.Workspace.RootPath = path.Join(home, root[2:])
}

Expand Down
2 changes: 1 addition & 1 deletion bundle/config/mutator/expand_workspace_root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestExpandWorkspaceRoot(t *testing.T) {
}
diags := bundle.Apply(context.Background(), b, mutator.ExpandWorkspaceRoot())
require.NoError(t, diags.Error())
assert.Equal(t, "/Users/[email protected]/foo", b.Config.Workspace.RootPath)
assert.Equal(t, "/Workspace/Users/[email protected]/foo", b.Config.Workspace.RootPath)
}

func TestExpandWorkspaceRootDoesNothing(t *testing.T) {
Expand Down
67 changes: 67 additions & 0 deletions bundle/config/mutator/prepend_workspace_prefix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package mutator

import (
"context"
"fmt"
"strings"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

type prependWorkspacePrefix struct{}

// PrependWorkspacePrefix prepends the workspace root path to all paths in the bundle.
func PrependWorkspacePrefix() bundle.Mutator {
return &prependWorkspacePrefix{}
}

func (m *prependWorkspacePrefix) Name() string {
return "PrependWorkspacePrefix"
}

var skipPrefixes = []string{
"/Workspace/",
"/Volumes/",
}

func (m *prependWorkspacePrefix) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
patterns := []dyn.Pattern{
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("root_path")),
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("file_path")),
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("artifact_path")),
dyn.NewPattern(dyn.Key("workspace"), dyn.Key("state_path")),
}

err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
var err error
for _, pattern := range patterns {
v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, pv dyn.Value) (dyn.Value, error) {
path, ok := pv.AsString()
if !ok {
return dyn.InvalidValue, fmt.Errorf("expected string, got %s", v.Kind())
}

for _, prefix := range skipPrefixes {
if strings.HasPrefix(path, prefix) {
return pv, nil
}
}

return dyn.NewValue(fmt.Sprintf("/Workspace%s", path), v.Locations()), nil
})

if err != nil {
return dyn.InvalidValue, err
}
}
return v, nil
})

if err != nil {
return diag.FromErr(err)
}

return nil
}
79 changes: 79 additions & 0 deletions bundle/config/mutator/prepend_workspace_prefix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package mutator

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/stretchr/testify/require"
)

func TestPrependWorkspacePrefix(t *testing.T) {
testCases := []struct {
path string
expected string
}{
{
path: "/Users/test",
expected: "/Workspace/Users/test",
},
{
path: "/Shared/test",
expected: "/Workspace/Shared/test",
},
{
path: "/Workspace/Users/test",
expected: "/Workspace/Users/test",
},
{
path: "/Volumes/Users/test",
expected: "/Volumes/Users/test",
},
}

for _, tc := range testCases {
b := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
RootPath: tc.path,
ArtifactPath: tc.path,
FilePath: tc.path,
StatePath: tc.path,
},
},
}

diags := bundle.Apply(context.Background(), b, PrependWorkspacePrefix())
require.Empty(t, diags)
require.Equal(t, tc.expected, b.Config.Workspace.RootPath)
require.Equal(t, tc.expected, b.Config.Workspace.ArtifactPath)
require.Equal(t, tc.expected, b.Config.Workspace.FilePath)
require.Equal(t, tc.expected, b.Config.Workspace.StatePath)
}
}

func TestPrependWorkspaceForDefaultConfig(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Bundle: config.Bundle{
Name: "test",
Target: "dev",
},
Workspace: config.Workspace{
CurrentUser: &config.User{
User: &iam.User{
UserName: "[email protected]",
},
},
},
},
}
diags := bundle.Apply(context.Background(), b, bundle.Seq(DefineDefaultWorkspaceRoot(), ExpandWorkspaceRoot(), DefineDefaultWorkspacePaths(), PrependWorkspacePrefix()))
require.Empty(t, diags)
require.Equal(t, "/Workspace/Users/[email protected]/.bundle/test/dev", b.Config.Workspace.RootPath)
require.Equal(t, "/Workspace/Users/[email protected]/.bundle/test/dev/artifacts", b.Config.Workspace.ArtifactPath)
require.Equal(t, "/Workspace/Users/[email protected]/.bundle/test/dev/files", b.Config.Workspace.FilePath)
require.Equal(t, "/Workspace/Users/[email protected]/.bundle/test/dev/state", b.Config.Workspace.StatePath)
}
72 changes: 72 additions & 0 deletions bundle/config/mutator/rewrite_workspace_prefix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package mutator

import (
"context"
"fmt"
"strings"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

type rewriteWorkspacePrefix struct{}

// RewriteWorkspacePrefix finds any strings in bundle configration that have
// workspace prefix plus workspace path variable used and removes workspace prefix from it.
func RewriteWorkspacePrefix() bundle.Mutator {
return &rewriteWorkspacePrefix{}
}

func (m *rewriteWorkspacePrefix) Name() string {
return "RewriteWorkspacePrefix"
}

func (m *rewriteWorkspacePrefix) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
diags := diag.Diagnostics{}
paths := map[string]string{
"/Workspace/${workspace.root_path}": "${workspace.root_path}",
"/Workspace${workspace.root_path}": "${workspace.root_path}",
"/Workspace/${workspace.file_path}": "${workspace.file_path}",
"/Workspace${workspace.file_path}": "${workspace.file_path}",
"/Workspace/${workspace.artifact_path}": "${workspace.artifact_path}",
"/Workspace${workspace.artifact_path}": "${workspace.artifact_path}",
"/Workspace/${workspace.state_path}": "${workspace.state_path}",
"/Workspace${workspace.state_path}": "${workspace.state_path}",
}

err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) {
// Walk through the bundle configuration, check all the string leafs and
// see if any of the prefixes are used in the remote path.
return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
vv, ok := v.AsString()
if !ok {
return v, nil
}

for path, replacePath := range paths {
if strings.Contains(vv, path) {
newPath := strings.Replace(vv, path, replacePath, 1)
diags = append(diags, diag.Diagnostic{
Severity: diag.Warning,
Summary: fmt.Sprintf("substring %q found in %q. Please update this to %q.", path, vv, newPath),
Detail: "For more information, please refer to: https://docs.databricks.com/en/release-notes/dev-tools/bundles.html#workspace-paths",
Locations: v.Locations(),
Paths: []dyn.Path{p},
})

// Remove the workspace prefix from the string.
return dyn.NewValue(newPath, v.Locations()), nil
}
}

return v, nil
})
})

if err != nil {
return diag.FromErr(err)
}

return diags
}
85 changes: 85 additions & 0 deletions bundle/config/mutator/rewrite_workspace_prefix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package mutator

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)

func TestNoWorkspacePrefixUsed(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
RootPath: "/Workspace/Users/test",
ArtifactPath: "/Workspace/Users/test/artifacts",
FilePath: "/Workspace/Users/test/files",
StatePath: "/Workspace/Users/test/state",
},

Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test_job": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
SparkPythonTask: &jobs.SparkPythonTask{
PythonFile: "/Workspace/${workspace.root_path}/file1.py",
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "/Workspace${workspace.file_path}/notebook1",
},
Libraries: []compute.Library{
{
Jar: "/Workspace/${workspace.artifact_path}/jar1.jar",
},
},
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "${workspace.file_path}/notebook2",
},
Libraries: []compute.Library{
{
Jar: "${workspace.artifact_path}/jar2.jar",
},
},
},
},
},
},
},
},
},
}

diags := bundle.Apply(context.Background(), b, RewriteWorkspacePrefix())
require.Len(t, diags, 3)

expectedErrors := map[string]bool{
`substring "/Workspace/${workspace.root_path}" found in "/Workspace/${workspace.root_path}/file1.py". Please update this to "${workspace.root_path}/file1.py".`: true,
`substring "/Workspace${workspace.file_path}" found in "/Workspace${workspace.file_path}/notebook1". Please update this to "${workspace.file_path}/notebook1".`: true,
`substring "/Workspace/${workspace.artifact_path}" found in "/Workspace/${workspace.artifact_path}/jar1.jar". Please update this to "${workspace.artifact_path}/jar1.jar".`: true,
}

for _, d := range diags {
require.Equal(t, d.Severity, diag.Warning)
require.Contains(t, expectedErrors, d.Summary)
delete(expectedErrors, d.Summary)
}

require.Equal(t, "${workspace.root_path}/file1.py", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[0].SparkPythonTask.PythonFile)
require.Equal(t, "${workspace.file_path}/notebook1", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[1].NotebookTask.NotebookPath)
require.Equal(t, "${workspace.artifact_path}/jar1.jar", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[1].Libraries[0].Jar)
require.Equal(t, "${workspace.file_path}/notebook2", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[2].NotebookTask.NotebookPath)
require.Equal(t, "${workspace.artifact_path}/jar2.jar", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[2].Libraries[0].Jar)

}
2 changes: 1 addition & 1 deletion bundle/config/workspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ type Workspace struct {

// Remote workspace base path for deployment state, for artifacts, as synchronization target.
// This defaults to "~/.bundle/${bundle.name}/${bundle.target}" where "~" expands to
// the current user's home directory in the workspace (e.g. `/Users/[email protected]`).
// the current user's home directory in the workspace (e.g. `/Workspace/Users/[email protected]`).
RootPath string `json:"root_path,omitempty"`

// Remote workspace path to synchronize local files to.
Expand Down
7 changes: 7 additions & 0 deletions bundle/phases/initialize.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,16 @@ func Initialize() bundle.Mutator {
mutator.MergePipelineClusters(),
mutator.InitializeWorkspaceClient(),
mutator.PopulateCurrentUser(),

mutator.DefineDefaultWorkspaceRoot(),
mutator.ExpandWorkspaceRoot(),
mutator.DefineDefaultWorkspacePaths(),
mutator.PrependWorkspacePrefix(),

// This mutator needs to be run before variable interpolation because it
// searches for strings with variable references in them.
mutator.RewriteWorkspacePrefix(),

mutator.SetVariables(),
// Intentionally placed before ResolveVariableReferencesInLookup, ResolveResourceReferences,
// ResolveVariableReferencesInComplexVariables and ResolveVariableReferences.
Expand Down
2 changes: 1 addition & 1 deletion bundle/tests/pipeline_glob_paths_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ func TestExpandPipelineGlobPaths(t *testing.T) {
require.NoError(t, diags.Error())
require.Equal(
t,
"/Users/[email protected]/.bundle/pipeline_glob_paths/default/files/dlt/nyc_taxi_loader",
"/Workspace/Users/[email protected]/.bundle/pipeline_glob_paths/default/files/dlt/nyc_taxi_loader",
b.Config.Resources.Pipelines["nyc_taxi_pipeline"].Libraries[0].Notebook.Path,
)
}
Expand Down
8 changes: 4 additions & 4 deletions bundle/tests/relative_path_translation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ func TestRelativePathTranslationDefault(t *testing.T) {
require.NoError(t, diags.Error())

t0 := b.Config.Resources.Jobs["job"].Tasks[0]
assert.Equal(t, "/remote/src/file1.py", t0.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file1.py", t0.SparkPythonTask.PythonFile)
t1 := b.Config.Resources.Jobs["job"].Tasks[1]
assert.Equal(t, "/remote/src/file1.py", t1.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file1.py", t1.SparkPythonTask.PythonFile)
}

func TestRelativePathTranslationOverride(t *testing.T) {
b, diags := initializeTarget(t, "./relative_path_translation", "override")
require.NoError(t, diags.Error())

t0 := b.Config.Resources.Jobs["job"].Tasks[0]
assert.Equal(t, "/remote/src/file2.py", t0.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file2.py", t0.SparkPythonTask.PythonFile)
t1 := b.Config.Resources.Jobs["job"].Tasks[1]
assert.Equal(t, "/remote/src/file2.py", t1.SparkPythonTask.PythonFile)
assert.Equal(t, "/Workspace/remote/src/file2.py", t1.SparkPythonTask.PythonFile)
}
2 changes: 1 addition & 1 deletion internal/bundle/deploy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ func TestAccDeployBasicBundleLogs(t *testing.T) {

stdout, stderr := blackBoxRun(t, root, "bundle", "deploy")
assert.Equal(t, strings.Join([]string{
fmt.Sprintf("Uploading bundle files to /Users/%s/.bundle/%s/files...", currentUser.UserName, uniqueId),
fmt.Sprintf("Uploading bundle files to /Workspace/Users/%s/.bundle/%s/files...", currentUser.UserName, uniqueId),
"Deploying resources...",
"Updating deployment state...",
"Deployment complete!\n",
Expand Down
2 changes: 1 addition & 1 deletion internal/bundle/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func getBundleRemoteRootPath(w *databricks.WorkspaceClient, t *testing.T, unique
// Compute root path for the bundle deployment
me, err := w.CurrentUser.Me(context.Background())
require.NoError(t, err)
root := fmt.Sprintf("/Users/%s/.bundle/%s", me.UserName, uniqueId)
root := fmt.Sprintf("/Workspace/Users/%s/.bundle/%s", me.UserName, uniqueId)
return root
}

Expand Down
Loading

0 comments on commit a8cff48

Please sign in to comment.