Skip to content

Commit

Permalink
Add validation mutator for volume artifact_path (#2050)
Browse files Browse the repository at this point in the history
## Changes
This PR:
1. Incrementally improves the error messages shown to the user when the
volume they are referring to in `workspace.artifact_path` does not
exist.
2. Performs this validation in both `bundle validate` and `bundle
deploy` compared to before on just deployments.
3. It runs "fast" validations on `bundle deploy`, which earlier were
only run on `bundle validate`.


## Tests
Unit tests and manually. Also, existing integration tests provide
coverage (`TestUploadArtifactToVolumeNotYetDeployed`,
`TestUploadArtifactFileToVolumeThatDoesNotExist`)

Examples:
```
.venv➜  bundle-playground git:(master) ✗ cli bundle validate
Error: cannot access volume capital.whatever.my_volume: User does not have READ VOLUME on Volume 'capital.whatever.my_volume'.
  at workspace.artifact_path
  in databricks.yml:7:18
```

and

```
.venv➜  bundle-playground git:(master) ✗ cli bundle validate
Error: volume capital.whatever.foobar does not exist
  at workspace.artifact_path
     resources.volumes.foo
  in databricks.yml:7:18
     databricks.yml:12:7

You are using a volume in your artifact_path that is managed by
this bundle but which has not been deployed yet. Please first deploy
the volume using 'bundle deploy' and then switch over to using it in
the artifact_path.
```
  • Loading branch information
shreyas-goenka authored Jan 2, 2025
1 parent 509f5ab commit 7beb0fb
Show file tree
Hide file tree
Showing 11 changed files with 444 additions and 398 deletions.
51 changes: 51 additions & 0 deletions bundle/config/validate/fast_validate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package validate

import (
"context"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
)

// FastValidate runs a subset of fast validation checks. This is a subset of the full
// suite of validation mutators that satisfy ANY ONE of the following criteria:
//
// 1. No file i/o or network requests are made in the mutator.
// 2. The validation is blocking for bundle deployments.
//
// The full suite of validation mutators is available in the [Validate] mutator.
type fastValidateReadonly struct{}

func FastValidateReadonly() bundle.ReadOnlyMutator {
return &fastValidateReadonly{}
}

func (f *fastValidateReadonly) Name() string {
return "fast_validate(readonly)"
}

func (f *fastValidateReadonly) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
return bundle.ApplyReadOnly(ctx, rb, bundle.Parallel(
// Fast mutators with only in-memory checks
JobClusterKeyDefined(),
JobTaskClusterSpec(),
SingleNodeCluster(),

// Blocking mutators. Deployments will fail if these checks fail.
ValidateArtifactPath(),
))
}

type fastValidate struct{}

func FastValidate() bundle.Mutator {
return &fastValidate{}
}

func (f *fastValidate) Name() string {
return "fast_validate"
}

func (f *fastValidate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), FastValidateReadonly())
}
9 changes: 5 additions & 4 deletions bundle/config/validate/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ func (l location) Path() dyn.Path {
// Apply implements bundle.Mutator.
func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), bundle.Parallel(
JobClusterKeyDefined(),
FastValidateReadonly(),

// Slow mutators that require network or file i/o. These are only
// run in the `bundle validate` command.
FilesToSync(),
ValidateSyncPatterns(),
JobTaskClusterSpec(),
ValidateFolderPermissions(),
SingleNodeCluster(),
ValidateSyncPatterns(),
))
}

Expand Down
129 changes: 129 additions & 0 deletions bundle/config/validate/validate_artifact_path.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package validate

import (
"context"
"errors"
"fmt"
"slices"
"strings"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/dynvar"
"github.com/databricks/databricks-sdk-go/apierr"
)

type validateArtifactPath struct{}

func ValidateArtifactPath() bundle.ReadOnlyMutator {
return &validateArtifactPath{}
}

func (v *validateArtifactPath) Name() string {
return "validate:artifact_paths"
}

func extractVolumeFromPath(artifactPath string) (string, string, string, error) {
if !libraries.IsVolumesPath(artifactPath) {
return "", "", "", fmt.Errorf("expected artifact_path to start with /Volumes/, got %s", artifactPath)
}

parts := strings.Split(artifactPath, "/")
volumeFormatErr := fmt.Errorf("expected UC volume path to be in the format /Volumes/<catalog>/<schema>/<volume>/..., got %s", artifactPath)

// Incorrect format.
if len(parts) < 5 {
return "", "", "", volumeFormatErr
}

catalogName := parts[2]
schemaName := parts[3]
volumeName := parts[4]

// Incorrect format.
if catalogName == "" || schemaName == "" || volumeName == "" {
return "", "", "", volumeFormatErr
}

return catalogName, schemaName, volumeName, nil
}

func findVolumeInBundle(r config.Root, catalogName, schemaName, volumeName string) (dyn.Path, []dyn.Location, bool) {
volumes := r.Resources.Volumes
for k, v := range volumes {
if v.CatalogName != catalogName || v.Name != volumeName {
continue
}
// UC schemas can be defined in the bundle itself, and thus might be interpolated
// at runtime via the ${resources.schemas.<name>} syntax. Thus we match the volume
// definition if the schema name is the same as the one in the bundle, or if the
// schema name is interpolated.
// We only have to check for ${resources.schemas...} references because any
// other valid reference (like ${var.foo}) would have been interpolated by this point.
p, ok := dynvar.PureReferenceToPath(v.SchemaName)
isSchemaDefinedInBundle := ok && p.HasPrefix(dyn.Path{dyn.Key("resources"), dyn.Key("schemas")})
if v.SchemaName != schemaName && !isSchemaDefinedInBundle {
continue
}
pathString := fmt.Sprintf("resources.volumes.%s", k)
return dyn.MustPathFromString(pathString), r.GetLocations(pathString), true
}
return nil, nil, false
}

func (v *validateArtifactPath) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
// We only validate UC Volumes paths right now.
if !libraries.IsVolumesPath(rb.Config().Workspace.ArtifactPath) {
return nil
}

wrapErrorMsg := func(s string) diag.Diagnostics {
return diag.Diagnostics{
{
Summary: s,
Severity: diag.Error,
Locations: rb.Config().GetLocations("workspace.artifact_path"),
Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")},
},
}
}

catalogName, schemaName, volumeName, err := extractVolumeFromPath(rb.Config().Workspace.ArtifactPath)
if err != nil {
return wrapErrorMsg(err.Error())
}
volumeFullName := fmt.Sprintf("%s.%s.%s", catalogName, schemaName, volumeName)
w := rb.WorkspaceClient()
_, err = w.Volumes.ReadByName(ctx, volumeFullName)

if errors.Is(err, apierr.ErrPermissionDenied) {
return wrapErrorMsg(fmt.Sprintf("cannot access volume %s: %s", volumeFullName, err))
}
if errors.Is(err, apierr.ErrNotFound) {
path, locations, ok := findVolumeInBundle(rb.Config(), catalogName, schemaName, volumeName)
if !ok {
return wrapErrorMsg(fmt.Sprintf("volume %s does not exist", volumeFullName))
}

// If the volume is defined in the bundle, provide a more helpful error diagnostic,
// with more details and location information.
return diag.Diagnostics{{
Summary: fmt.Sprintf("volume %s does not exist", volumeFullName),
Severity: diag.Error,
Detail: `You are using a volume in your artifact_path that is managed by
this bundle but which has not been deployed yet. Please first deploy
the volume using 'bundle deploy' and then switch over to using it in
the artifact_path.`,
Locations: slices.Concat(rb.Config().GetLocations("workspace.artifact_path"), locations),
Paths: append([]dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, path),
}}

}
if err != nil {
return wrapErrorMsg(fmt.Sprintf("cannot read volume %s: %s", volumeFullName, err))
}
return nil
}
Loading

0 comments on commit 7beb0fb

Please sign in to comment.