Skip to content

Commit

Permalink
Add validation for files with a .(resource-name).yml extension (#1780)
Browse files Browse the repository at this point in the history
## Changes
We want to encourage a pattern of specifying only a single resource in a
YAML file when the `.(resource-type).yml` extension is used (for
example, `.job.yml`). This convention could allow us to bijectively map
a resource YAML file to its corresponding resource in the Databricks
workspace.

This PR:
1. Emits a recommendation diagnostic when we detect this convention is
being violated. We can promote this to a warning when we want to
encourage this pattern more strongly.
2. Visualises the recommendation diagnostics in the `bundle validate`
command.

**NOTE:** While this PR also shows the recommendation for `.yaml` files,
we do not encourage users to use this extension. We only support it here
since it's part of the YAML standard and some existing users might
already be using `.yaml`.

## Tests
Unit tests and manually. Here's what an example output looks like:
```
Recommendation: define a single job in a file with the .job.yml extension.
  at resources.jobs.bar
     resources.jobs.foo
  in foo.job.yml:13:7
     foo.job.yml:5:7

The following resources are defined or configured in this file:
  - bar (job)
  - foo (job)
```

---------

Co-authored-by: Lennart Kats (databricks) <[email protected]>
  • Loading branch information
shreyas-goenka and lennartkats-db authored Oct 7, 2024
1 parent a8cff48 commit bca9c2e
Show file tree
Hide file tree
Showing 22 changed files with 721 additions and 8 deletions.
2 changes: 1 addition & 1 deletion bundle/config/loader/entry_point_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func TestEntryPointNoRootPath(t *testing.T) {

func TestEntryPoint(t *testing.T) {
b := &bundle.Bundle{
BundleRootPath: "testdata",
BundleRootPath: "testdata/basic",
}
diags := bundle.Apply(context.Background(), b, loader.EntryPoint())
require.NoError(t, diags.Error())
Expand Down
130 changes: 130 additions & 0 deletions bundle/config/loader/process_include.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,135 @@ package loader
import (
"context"
"fmt"
"slices"
"sort"
"strings"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

func validateFileFormat(configRoot dyn.Value, filePath string) diag.Diagnostics {
for _, resourceDescription := range config.SupportedResources() {
singularName := resourceDescription.SingularName

for _, yamlExt := range []string{"yml", "yaml"} {
ext := fmt.Sprintf(".%s.%s", singularName, yamlExt)
if strings.HasSuffix(filePath, ext) {
return validateSingleResourceDefined(configRoot, ext, singularName)
}
}
}

return nil
}

func validateSingleResourceDefined(configRoot dyn.Value, ext, typ string) diag.Diagnostics {
type resource struct {
path dyn.Path
value dyn.Value
typ string
key string
}

resources := []resource{}
supportedResources := config.SupportedResources()

// Gather all resources defined in the resources block.
_, err := dyn.MapByPattern(
configRoot,
dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()),
func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
// The key for the resource, e.g. "my_job" for jobs.my_job.
k := p[2].Key()
// The type of the resource, e.g. "job" for jobs.my_job.
typ := supportedResources[p[1].Key()].SingularName

resources = append(resources, resource{path: p, value: v, typ: typ, key: k})
return v, nil
})
if err != nil {
return diag.FromErr(err)
}

// Gather all resources defined in a target block.
_, err = dyn.MapByPattern(
configRoot,
dyn.NewPattern(dyn.Key("targets"), dyn.AnyKey(), dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()),
func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
// The key for the resource, e.g. "my_job" for jobs.my_job.
k := p[4].Key()
// The type of the resource, e.g. "job" for jobs.my_job.
typ := supportedResources[p[3].Key()].SingularName

resources = append(resources, resource{path: p, value: v, typ: typ, key: k})
return v, nil
})
if err != nil {
return diag.FromErr(err)
}

typeMatch := true
seenKeys := map[string]struct{}{}
for _, rr := range resources {
// case: The resource is not of the correct type.
if rr.typ != typ {
typeMatch = false
break
}

seenKeys[rr.key] = struct{}{}
}

// Format matches. There's at most one resource defined in the file.
// The resource is also of the correct type.
if typeMatch && len(seenKeys) <= 1 {
return nil
}

detail := strings.Builder{}
detail.WriteString("The following resources are defined or configured in this file:\n")
lines := []string{}
for _, r := range resources {
lines = append(lines, fmt.Sprintf(" - %s (%s)\n", r.key, r.typ))
}
// Sort the lines to print to make the output deterministic.
sort.Strings(lines)
// Compact the lines before writing them to the message to remove any duplicate lines.
// This is needed because we do not dedup earlier when gathering the resources
// and it's valid to define the same resource in both the resources and targets block.
lines = slices.Compact(lines)
for _, l := range lines {
detail.WriteString(l)
}

locations := []dyn.Location{}
paths := []dyn.Path{}
for _, rr := range resources {
locations = append(locations, rr.value.Locations()...)
paths = append(paths, rr.path)
}
// Sort the locations and paths to make the output deterministic.
sort.Slice(locations, func(i, j int) bool {
return locations[i].String() < locations[j].String()
})
sort.Slice(paths, func(i, j int) bool {
return paths[i].String() < paths[j].String()
})

return diag.Diagnostics{
{
Severity: diag.Recommendation,
Summary: fmt.Sprintf("define a single %s in a file with the %s extension.", strings.ReplaceAll(typ, "_", " "), ext),
Detail: detail.String(),
Locations: locations,
Paths: paths,
},
}
}

type processInclude struct {
fullPath string
relPath string
Expand All @@ -31,6 +154,13 @@ func (m *processInclude) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos
if diags.HasError() {
return diags
}

// Add any diagnostics associated with the file format.
diags = append(diags, validateFileFormat(this.Value(), m.relPath)...)
if diags.HasError() {
return diags
}

err := b.Config.Merge(this)
if err != nil {
diags = diags.Extend(diag.FromErr(err))
Expand Down
185 changes: 184 additions & 1 deletion bundle/config/loader/process_include_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/loader"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestProcessInclude(t *testing.T) {
b := &bundle.Bundle{
BundleRootPath: "testdata",
BundleRootPath: "testdata/basic",
Config: config.Root{
Workspace: config.Workspace{
Host: "foo",
Expand All @@ -33,3 +35,184 @@ func TestProcessInclude(t *testing.T) {
require.NoError(t, diags.Error())
assert.Equal(t, "bar", b.Config.Workspace.Host)
}

func TestProcessIncludeFormatMatch(t *testing.T) {
for _, fileName := range []string{
"one_job.job.yml",
"one_pipeline.pipeline.yaml",
"two_job.yml",
"job_and_pipeline.yml",
"multiple_resources.yml",
} {
t.Run(fileName, func(t *testing.T) {
b := &bundle.Bundle{
BundleRootPath: "testdata/format_match",
Config: config.Root{
Bundle: config.Bundle{
Name: "format_test",
},
},
}

m := loader.ProcessInclude(filepath.Join(b.BundleRootPath, fileName), fileName)
diags := bundle.Apply(context.Background(), b, m)
assert.Empty(t, diags)
})
}
}

func TestProcessIncludeFormatNotMatch(t *testing.T) {
for fileName, expectedDiags := range map[string]diag.Diagnostics{
"single_job.pipeline.yaml": {
{
Severity: diag.Recommendation,
Summary: "define a single pipeline in a file with the .pipeline.yaml extension.",
Detail: "The following resources are defined or configured in this file:\n - job1 (job)\n",
Locations: []dyn.Location{
{File: filepath.FromSlash("testdata/format_not_match/single_job.pipeline.yaml"), Line: 11, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/single_job.pipeline.yaml"), Line: 4, Column: 7},
},
Paths: []dyn.Path{
dyn.MustPathFromString("resources.jobs.job1"),
dyn.MustPathFromString("targets.target1.resources.jobs.job1"),
},
},
},
"job_and_pipeline.job.yml": {
{
Severity: diag.Recommendation,
Summary: "define a single job in a file with the .job.yml extension.",
Detail: "The following resources are defined or configured in this file:\n - job1 (job)\n - pipeline1 (pipeline)\n",
Locations: []dyn.Location{
{File: filepath.FromSlash("testdata/format_not_match/job_and_pipeline.job.yml"), Line: 11, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/job_and_pipeline.job.yml"), Line: 4, Column: 7},
},
Paths: []dyn.Path{
dyn.MustPathFromString("resources.pipelines.pipeline1"),
dyn.MustPathFromString("targets.target1.resources.jobs.job1"),
},
},
},
"job_and_pipeline.experiment.yml": {
{
Severity: diag.Recommendation,
Summary: "define a single experiment in a file with the .experiment.yml extension.",
Detail: "The following resources are defined or configured in this file:\n - job1 (job)\n - pipeline1 (pipeline)\n",
Locations: []dyn.Location{
{File: filepath.FromSlash("testdata/format_not_match/job_and_pipeline.experiment.yml"), Line: 11, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/job_and_pipeline.experiment.yml"), Line: 4, Column: 7},
},
Paths: []dyn.Path{
dyn.MustPathFromString("resources.pipelines.pipeline1"),
dyn.MustPathFromString("targets.target1.resources.jobs.job1"),
},
},
},
"two_jobs.job.yml": {
{
Severity: diag.Recommendation,
Summary: "define a single job in a file with the .job.yml extension.",
Detail: "The following resources are defined or configured in this file:\n - job1 (job)\n - job2 (job)\n",
Locations: []dyn.Location{
{File: filepath.FromSlash("testdata/format_not_match/two_jobs.job.yml"), Line: 4, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/two_jobs.job.yml"), Line: 7, Column: 7},
},
Paths: []dyn.Path{
dyn.MustPathFromString("resources.jobs.job1"),
dyn.MustPathFromString("resources.jobs.job2"),
},
},
},
"second_job_in_target.job.yml": {
{
Severity: diag.Recommendation,
Summary: "define a single job in a file with the .job.yml extension.",
Detail: "The following resources are defined or configured in this file:\n - job1 (job)\n - job2 (job)\n",
Locations: []dyn.Location{
{File: filepath.FromSlash("testdata/format_not_match/second_job_in_target.job.yml"), Line: 11, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/second_job_in_target.job.yml"), Line: 4, Column: 7},
},
Paths: []dyn.Path{
dyn.MustPathFromString("resources.jobs.job1"),
dyn.MustPathFromString("targets.target1.resources.jobs.job2"),
},
},
},
"two_jobs_in_target.job.yml": {
{
Severity: diag.Recommendation,
Summary: "define a single job in a file with the .job.yml extension.",
Detail: "The following resources are defined or configured in this file:\n - job1 (job)\n - job2 (job)\n",
Locations: []dyn.Location{
{File: filepath.FromSlash("testdata/format_not_match/two_jobs_in_target.job.yml"), Line: 6, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/two_jobs_in_target.job.yml"), Line: 8, Column: 11},
},
Paths: []dyn.Path{
dyn.MustPathFromString("targets.target1.resources.jobs.job1"),
dyn.MustPathFromString("targets.target1.resources.jobs.job2"),
},
},
},
"multiple_resources.model_serving_endpoint.yml": {
{
Severity: diag.Recommendation,
Summary: "define a single model serving endpoint in a file with the .model_serving_endpoint.yml extension.",
Detail: `The following resources are defined or configured in this file:
- experiment1 (experiment)
- job1 (job)
- job2 (job)
- job3 (job)
- model1 (model)
- model_serving_endpoint1 (model_serving_endpoint)
- pipeline1 (pipeline)
- pipeline2 (pipeline)
- quality_monitor1 (quality_monitor)
- registered_model1 (registered_model)
- schema1 (schema)
`,
Locations: []dyn.Location{
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 12, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 14, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 18, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 22, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 24, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 28, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 35, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 39, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 43, Column: 11},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 4, Column: 7},
{File: filepath.FromSlash("testdata/format_not_match/multiple_resources.model_serving_endpoint.yml"), Line: 8, Column: 7},
},
Paths: []dyn.Path{
dyn.MustPathFromString("resources.experiments.experiment1"),
dyn.MustPathFromString("resources.jobs.job1"),
dyn.MustPathFromString("resources.jobs.job2"),
dyn.MustPathFromString("resources.model_serving_endpoints.model_serving_endpoint1"),
dyn.MustPathFromString("resources.models.model1"),
dyn.MustPathFromString("resources.pipelines.pipeline1"),
dyn.MustPathFromString("resources.pipelines.pipeline2"),
dyn.MustPathFromString("resources.schemas.schema1"),
dyn.MustPathFromString("targets.target1.resources.jobs.job3"),
dyn.MustPathFromString("targets.target1.resources.quality_monitors.quality_monitor1"),
dyn.MustPathFromString("targets.target1.resources.registered_models.registered_model1"),
},
},
},
} {
t.Run(fileName, func(t *testing.T) {
b := &bundle.Bundle{
BundleRootPath: "testdata/format_not_match",
Config: config.Root{
Bundle: config.Bundle{
Name: "format_test",
},
},
}

m := loader.ProcessInclude(filepath.Join(b.BundleRootPath, fileName), fileName)
diags := bundle.Apply(context.Background(), b, m)
require.Len(t, diags, 1)
assert.Equal(t, expectedDiags, diags)
})
}
}
File renamed without changes.
11 changes: 11 additions & 0 deletions bundle/config/loader/testdata/format_match/job_and_pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
resources:
pipelines:
pipeline1:
name: pipeline1

targets:
target1:
resources:
jobs:
job1:
name: job1
Loading

0 comments on commit bca9c2e

Please sign in to comment.