Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add legacy option for run_as #1384

Merged
merged 11 commits into from
Apr 22, 2024
68 changes: 59 additions & 9 deletions bundle/config/mutator/run_as.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package mutator
import (
"context"
"fmt"
"slices"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/jobs"
Expand Down Expand Up @@ -103,19 +105,12 @@ func validateRunAs(b *bundle.Bundle) error {
return nil
}

func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
// Mutator is a no-op if run_as is not specified in the bundle
func setRunAsForJobs(b *bundle.Bundle) {
runAs := b.Config.RunAs
if runAs == nil {
return nil
return
}

// Assert the run_as configuration is valid in the context of the bundle
if err := validateRunAs(b); err != nil {
return diag.FromErr(err)
}

// Set run_as for jobs
for i := range b.Config.Resources.Jobs {
job := b.Config.Resources.Jobs[i]
if job.RunAs != nil {
Expand All @@ -126,6 +121,61 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
UserName: runAs.UserName,
}
}
}

func legacySetRunAsForPipelines(b *bundle.Bundle) {
runAs := b.Config.RunAs
if runAs == nil {
return
}

me := b.Config.Workspace.CurrentUser.UserName
// If user deploying the bundle and the one defined in run_as are the same
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
if runAs.UserName == me || runAs.ServicePrincipalName == me {
return
}

for i := range b.Config.Resources.Pipelines {
pipeline := b.Config.Resources.Pipelines[i]
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
(runAs.UserName != "" && p.UserName == runAs.UserName)
})
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
Level: "IS_OWNER",
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
})
}
}

func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
// Mutator is a no-op if run_as is not specified in the bundle
runAs := b.Config.RunAs
if runAs == nil {
return nil
}

if b.Config.RunAs.UseLegacy {
legacySetRunAsForPipelines(b)
setRunAsForJobs(b)
return diag.Diagnostics{
{
Severity: diag.Warning,
Summary: "Using legacy mode of run_as. This mode changes the OWNER of a pipeline to the run_as identity. Changing the owner of a DLT pipeline requires the user deploying the bundle to be a workspace admin, and a metastore admin if the target is in UC.",
Path: dyn.MustPathFromString("run_as.use_legacy"),
Location: b.Config.GetLocation("run_as.use_legacy"),
},
}
}

// Assert the run_as configuration is valid in the context of the bundle
if err := validateRunAs(b); err != nil {
return diag.FromErr(err)
}

setRunAsForJobs(b)
return nil
}
12 changes: 8 additions & 4 deletions bundle/config/mutator/run_as_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ func TestRunAsWorksForAllowedResources(t *testing.T) {
},
},
},
RunAs: &jobs.JobRunAs{
UserName: "bob",
RunAs: &config.RunAs{
JobRunAs: jobs.JobRunAs{
UserName: "bob",
},
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
Expand Down Expand Up @@ -145,8 +147,10 @@ func TestRunAsErrorForUnsupportedResources(t *testing.T) {
},
},
},
RunAs: &jobs.JobRunAs{
UserName: "bob",
RunAs: &config.RunAs{
JobRunAs: jobs.JobRunAs{
UserName: "bob",
},
},
}

Expand Down
3 changes: 1 addition & 2 deletions bundle/config/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"github.com/databricks/cli/libs/dyn/merge"
"github.com/databricks/cli/libs/dyn/yamlloader"
"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go/service/jobs"
)

type Root struct {
Expand Down Expand Up @@ -58,7 +57,7 @@ type Root struct {
Sync Sync `json:"sync,omitempty"`

// RunAs section allows to define an execution identity for jobs and pipelines runs
RunAs *jobs.JobRunAs `json:"run_as,omitempty"`
RunAs *RunAs `json:"run_as,omitempty"`

Experimental *Experimental `json:"experimental,omitempty"`

Expand Down
17 changes: 17 additions & 0 deletions bundle/config/run_as.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package config

import "github.com/databricks/databricks-sdk-go/service/jobs"

type RunAs struct {
jobs.JobRunAs

// Enable legacy run_as behavior. That is:
// - Set the run_as identity as the owner of any pipelines in the bundle.
// - Do not error in the presence of resources that do not support run_as.
// As of April 2024 this includes pipelines and model serving endpoints.
//
// This mode of run_as requires the deploying user to be a workspace and metastore
// admin for working properly. Use of this flag is not recommend for new bundles,
// and it is only provided for backward compatibility.
UseLegacy bool `json:"use_legacy,omitempty"`
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved
}
66 changes: 66 additions & 0 deletions bundle/tests/run_as/legacy/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
bundle:
name: "run_as"

run_as:
service_principal_name: "my_service_principal"
use_legacy: true

resources:
jobs:
job_one:
name: Job One

tasks:
- task_key: "task_one"
notebook_task:
notebook_path: "./test.py"

job_two:
name: Job Two

tasks:
- task_key: "task_two"
notebook_task:
notebook_path: "./test.py"

job_three:
name: Job Three

run_as:
service_principal_name: "my_service_principal_for_job"

tasks:
- task_key: "task_three"
notebook_task:
notebook_path: "./test.py"

pipelines:
nyc_taxi_pipeline:
name: "nyc taxi loader"

permissions:
- level: CAN_VIEW
service_principal_name: my_service_principal
- level: CAN_VIEW
user_name: my_user_name

libraries:
- notebook:
path: ./dlt/nyc_taxi_loader


models:
model_one:
name: "skynet"

registered_models:
model_two:
name: "skynet (in UC)"

experiments:
experiment_one:
name: "experiment_one"

model_serving_endpoints:
model_serving_one:
name: "skynet"
51 changes: 51 additions & 0 deletions bundle/tests/run_as_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/ml"
"github.com/databricks/databricks-sdk-go/service/serving"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -233,3 +234,53 @@ func TestRunAsErrorNeitherUserOrSpSpecifiedAtTargetOverride(t *testing.T) {
configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml")
assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath))
}

func TestLegacyRunAs(t *testing.T) {
b := load(t, "./run_as/legacy")

ctx := context.Background()
bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
b.Config.Workspace.CurrentUser = &config.User{
User: &iam.User{
UserName: "[email protected]",
},
}
return nil
})

diags := bundle.Apply(ctx, b, mutator.SetRunAs())
assert.NoError(t, diags.Error())

assert.Len(t, b.Config.Resources.Jobs, 3)
jobs := b.Config.Resources.Jobs

// job_one and job_two should have the same run_as identity as the bundle.
assert.NotNil(t, jobs["job_one"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_one"].RunAs.UserName)

assert.NotNil(t, jobs["job_two"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_two"].RunAs.UserName)

// job_three should retain it's run_as identity.
assert.NotNil(t, jobs["job_three"].RunAs)
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)

// Assert owner permissions for pipelines are set.
pipelines := b.Config.Resources.Pipelines
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)

assert.Equal(t, "CAN_VIEW", pipelines["nyc_taxi_pipeline"].Permissions[0].Level)
assert.Equal(t, "my_user_name", pipelines["nyc_taxi_pipeline"].Permissions[0].UserName)

assert.Equal(t, "IS_OWNER", pipelines["nyc_taxi_pipeline"].Permissions[1].Level)
assert.Equal(t, "my_service_principal", pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName)

// Assert other resources are not affected.
assert.Equal(t, ml.Model{Name: "skynet"}, *b.Config.Resources.Models["model_one"].Model)
assert.Equal(t, catalog.CreateRegisteredModelRequest{Name: "skynet (in UC)"}, *b.Config.Resources.RegisteredModels["model_two"].CreateRegisteredModelRequest)
assert.Equal(t, ml.Experiment{Name: "experiment_one"}, *b.Config.Resources.Experiments["experiment_one"].Experiment)
assert.Equal(t, serving.CreateServingEndpoint{Name: "skynet"}, *b.Config.Resources.ModelServingEndpoints["model_serving_one"].CreateServingEndpoint)
}
Loading