Skip to content

Commit

Permalink
Postpone directory creation until upload start
Browse files Browse the repository at this point in the history
This ensure that "bundle validate" does not create remote directory.

Extend empty_bundle_test.go to check if remote path is created prematurely
  • Loading branch information
denik committed Dec 10, 2024
1 parent f3c628e commit 8c90966
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 26 deletions.
24 changes: 22 additions & 2 deletions internal/bundle/empty_bundle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,38 @@ import (

"github.com/databricks/cli/internal/acc"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestAccEmptyBundleDeploy(t *testing.T) {
ctx, _ := acc.WorkspaceTest(t)
ctx, w := acc.WorkspaceTest(t)

uniqueId := uuid.New().String()
me, err := w.W.CurrentUser.Me(ctx)
require.NoError(t, err)
remoteRoot := fmt.Sprintf("/Workspace/Users/%s/.bundle/%s", me.UserName, uniqueId)

// create empty bundle
tmpDir := t.TempDir()
f, err := os.Create(filepath.Join(tmpDir, "databricks.yml"))
require.NoError(t, err)

bundleRoot := fmt.Sprintf(`bundle:
name: %s`, uuid.New().String())
name: %s`, uniqueId)
_, err = f.WriteString(bundleRoot)
require.NoError(t, err)
f.Close()

_, err = w.W.Workspace.GetStatusByPath(ctx, remoteRoot)
assert.ErrorContains(t, err, "doesn't exist")

mustValidateBundle(t, ctx, tmpDir)

// regression: "bundle validate" must not create a directory
_, err = w.W.Workspace.GetStatusByPath(ctx, remoteRoot)
require.ErrorContains(t, err, "doesn't exist")

// deploy empty bundle
err = deployBundle(t, ctx, tmpDir)
require.NoError(t, err)
Expand All @@ -33,4 +48,9 @@ func TestAccEmptyBundleDeploy(t *testing.T) {
err = destroyBundle(t, ctx, tmpDir)
require.NoError(t, err)
})

// verify that remoteRoot was actually relevant location to test
_, err = w.W.Workspace.GetStatusByPath(ctx, remoteRoot)
assert.NoError(t, err)

}
19 changes: 12 additions & 7 deletions internal/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,13 +509,15 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) {

// Hypothetical repo path doesn't exist.
nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("doesnt-exist-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil)
remoteExists, err := sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
assert.False(t, remoteExists)

// Paths nested under a hypothetical repo path should yield the same error.
nestedPath := path.Join(nonExistingRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
remoteExists, err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
assert.False(t, remoteExists)
}

func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) {
Expand All @@ -526,13 +528,15 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) {
_, remoteRepoPath := setupRepo(t, wsc, ctx)

// Repo itself is usable.
err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil)
remoteExists, err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil)
assert.NoError(t, err)
assert.True(t, remoteExists)

// Path nested under repo path is usable.
nestedPath := path.Join(remoteRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
remoteExists, err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.NoError(t, err)
assert.False(t, remoteExists)

// Verify that the directory has been created.
info, err := wsc.Workspace.GetStatusByPath(ctx, nestedPath)
Expand All @@ -549,8 +553,9 @@ func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) {
require.NoError(t, err)

remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("ensure-path-exists-test-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me)
remoteExists, err := sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me)
assert.NoError(t, err)
assert.False(t, remoteExists)

// Clean up directory after test.
defer func() {
Expand All @@ -560,8 +565,8 @@ func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) {
assert.NoError(t, err)
}()

// Verify that the directory has been created.
// Verify that the directory has not been created.
info, err := wsc.Workspace.GetStatusByPath(ctx, remotePath)
require.NoError(t, err)
require.ErrorContains(t, err, "not exist")
require.Equal(t, workspace.ObjectTypeDirectory, info.ObjectType)
}
39 changes: 23 additions & 16 deletions libs/sync/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,16 @@ func repoPathForPath(me *iam.User, remotePath string) string {

// EnsureRemotePathIsUsable checks if the specified path is nested under
// expected base paths and if it is a directory or repository.
func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string, me *iam.User) error {
// Returns (doesRemoteExist, error)
func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string, me *iam.User) (bool, error) {
var err error

// TODO: we should cache CurrentUser.Me at the SDK level
// for now we let clients pass in any existing user they might already have
if me == nil {
me, err = wsc.CurrentUser.Me(ctx)
if err != nil {
return err
return false, err
}
}

Expand All @@ -43,27 +44,20 @@ func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie
if err != nil {
// We only deal with 404s below.
if !apierr.IsMissing(err) {
return err
return false, err
}

// If the path is nested under a repo, the repo has to exist.
if strings.HasPrefix(remotePath, "/Repos/") {
repoPath := repoPathForPath(me, remotePath)
_, err = wsc.Workspace.GetStatusByPath(ctx, repoPath)
if err != nil && apierr.IsMissing(err) {
return fmt.Errorf("%s does not exist; please create it first", repoPath)
return false, fmt.Errorf("%s does not exist; please create it first", repoPath)
}
}

// The workspace path doesn't exist. Create it and try again.
err = wsc.Workspace.MkdirsByPath(ctx, remotePath)
if err != nil {
return fmt.Errorf("unable to create directory at %s: %w", remotePath, err)
}
info, err = wsc.Workspace.GetStatusByPath(ctx, remotePath)
if err != nil {
return err
}
return false, nil

}

log.Debugf(
Expand All @@ -77,10 +71,23 @@ func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie
// We expect the object at path to be a directory or a repo.
switch info.ObjectType {
case workspace.ObjectTypeDirectory:
return nil
return true, nil
case workspace.ObjectTypeRepo:
return nil
return true, nil
}

return fmt.Errorf("%s points to a %s", remotePath, strings.ToLower(info.ObjectType.String()))
return true, fmt.Errorf("%s points to a %s", remotePath, strings.ToLower(info.ObjectType.String()))
}

func createRemotePath(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string) error {
// The workspace path doesn't exist. Create it and try again.
err := wsc.Workspace.MkdirsByPath(ctx, remotePath)
if err != nil {
return fmt.Errorf("unable to create directory at %s: %w", remotePath, err)
}
_, err = wsc.Workspace.GetStatusByPath(ctx, remotePath)
if err != nil {
return err
}
return nil
}
14 changes: 13 additions & 1 deletion libs/sync/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ type Sync struct {
// WaitGroup is automatically created when an output handler is provided in the SyncOptions.
// Close call is required to ensure the output handler goroutine handles all events in time.
outputWaitGroup *stdsync.WaitGroup

// If this flag is not set, we'll create remote directory before starting upload
remoteExists bool
}

// New initializes and returns a new [Sync] instance.
Expand All @@ -84,7 +87,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
}

// Verify that the remote path we're about to synchronize to is valid and allowed.
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser)
remoteExists, err := EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -141,6 +144,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
notifier: notifier,
outputWaitGroup: outputWaitGroup,
seq: 0,
remoteExists: remoteExists,
}, nil
}

Expand Down Expand Up @@ -180,6 +184,14 @@ func (s *Sync) notifyComplete(ctx context.Context, d diff) {
// Returns the list of files tracked (and synchronized) by the syncer during the run,
// and an error if any occurred.
func (s *Sync) RunOnce(ctx context.Context) ([]fileset.File, error) {
if !s.remoteExists {
err := createRemotePath(ctx, s.WorkspaceClient, s.RemotePath)
if err != nil {
return nil, err
}
s.remoteExists = true
}

files, err := s.GetFileList(ctx)
if err != nil {
return files, err
Expand Down

0 comments on commit 8c90966

Please sign in to comment.