Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add legacy option for run_as #1384

Merged
merged 11 commits into from
Apr 22, 2024
13 changes: 13 additions & 0 deletions bundle/config/experimental.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@ type Experimental struct {
// In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it.
// For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635
PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"`

// Enable legacy run_as behavior. That is:
// - Set the run_as identity as the owner of any pipelines in the bundle.
// - Do not error in the presence of resources that do not support run_as.
// As of April 2024 this includes pipelines and model serving endpoints.
//
// This mode of run_as requires the deploying user to be a workspace and metastore
// admin. Use of this flag is not recommend for new bundles, and it is only provided
// to unblock customers that are stuck due to breaking changes in the run_as behavior
// made in https://github.com/databricks/cli/pull/1233. This flag might
// be removed in the future once we have a proper workaround like allowing IS_OWNER
// as a top-level permission in the DAB.
UseLegacyRunAs bool `json:"use_legacy_run_as,omitempty"`
}

type Command string
Expand Down
70 changes: 61 additions & 9 deletions bundle/config/mutator/run_as.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package mutator
import (
"context"
"fmt"
"slices"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/jobs"
Expand Down Expand Up @@ -101,19 +103,12 @@ func validateRunAs(b *bundle.Bundle) error {
return nil
}

func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
// Mutator is a no-op if run_as is not specified in the bundle
func setRunAsForJobs(b *bundle.Bundle) {
runAs := b.Config.RunAs
if runAs == nil {
return nil
return
}

// Assert the run_as configuration is valid in the context of the bundle
if err := validateRunAs(b); err != nil {
return diag.FromErr(err)
}

// Set run_as for jobs
for i := range b.Config.Resources.Jobs {
job := b.Config.Resources.Jobs[i]
if job.RunAs != nil {
Expand All @@ -124,6 +119,63 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
UserName: runAs.UserName,
}
}
}

// Legacy behavior of run_as for DLT pipelines. Available under the experimental.use_run_as_legacy flag.
// Only available to unblock customers stuck due to breaking changes in https://github.com/databricks/cli/pull/1233
func setPipelineOwnersToRunAsIdentity(b *bundle.Bundle) {
runAs := b.Config.RunAs
if runAs == nil {
return
}

me := b.Config.Workspace.CurrentUser.UserName
// If user deploying the bundle and the one defined in run_as are the same
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
if runAs.UserName == me || runAs.ServicePrincipalName == me {
return
}

for i := range b.Config.Resources.Pipelines {
pipeline := b.Config.Resources.Pipelines[i]
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
(runAs.UserName != "" && p.UserName == runAs.UserName)
})
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
Level: "IS_OWNER",
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
})
}
}

func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
// Mutator is a no-op if run_as is not specified in the bundle
runAs := b.Config.RunAs
if runAs == nil {
return nil
}

if b.Config.Experimental != nil && b.Config.Experimental.UseLegacyRunAs {
setPipelineOwnersToRunAsIdentity(b)
setRunAsForJobs(b)
return diag.Diagnostics{
{
Severity: diag.Warning,
Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.",
Path: dyn.MustPathFromString("experimental.use_legacy_run_as"),
Location: b.Config.GetLocation("experimental.use_legacy_run_as"),
},
}
}

// Assert the run_as configuration is valid in the context of the bundle
if err := validateRunAs(b); err != nil {
return diag.FromErr(err)
}

setRunAsForJobs(b)
return nil
}
68 changes: 68 additions & 0 deletions bundle/tests/run_as/legacy/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
bundle:
name: "run_as"

run_as:
service_principal_name: "my_service_principal"

experimental:
use_legacy_run_as: true

resources:
jobs:
job_one:
name: Job One

tasks:
- task_key: "task_one"
notebook_task:
notebook_path: "./test.py"

job_two:
name: Job Two

tasks:
- task_key: "task_two"
notebook_task:
notebook_path: "./test.py"

job_three:
name: Job Three

run_as:
service_principal_name: "my_service_principal_for_job"

tasks:
- task_key: "task_three"
notebook_task:
notebook_path: "./test.py"

pipelines:
nyc_taxi_pipeline:
name: "nyc taxi loader"

permissions:
- level: CAN_VIEW
service_principal_name: my_service_principal
- level: CAN_VIEW
user_name: my_user_name

libraries:
- notebook:
path: ./dlt/nyc_taxi_loader


models:
model_one:
name: "skynet"

registered_models:
model_two:
name: "skynet (in UC)"

experiments:
experiment_one:
name: "experiment_one"

model_serving_endpoints:
model_serving_one:
name: "skynet"
51 changes: 51 additions & 0 deletions bundle/tests/run_as_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/ml"
"github.com/databricks/databricks-sdk-go/service/serving"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -233,3 +234,53 @@ func TestRunAsErrorNeitherUserOrSpSpecifiedAtTargetOverride(t *testing.T) {
configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml")
assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath))
}

func TestLegacyRunAs(t *testing.T) {
b := load(t, "./run_as/legacy")

ctx := context.Background()
bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
b.Config.Workspace.CurrentUser = &config.User{
User: &iam.User{
UserName: "[email protected]",
},
}
return nil
})

diags := bundle.Apply(ctx, b, mutator.SetRunAs())
assert.NoError(t, diags.Error())

assert.Len(t, b.Config.Resources.Jobs, 3)
jobs := b.Config.Resources.Jobs

// job_one and job_two should have the same run_as identity as the bundle.
assert.NotNil(t, jobs["job_one"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_one"].RunAs.UserName)

assert.NotNil(t, jobs["job_two"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_two"].RunAs.UserName)

// job_three should retain it's run_as identity.
assert.NotNil(t, jobs["job_three"].RunAs)
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)

// Assert owner permissions for pipelines are set.
pipelines := b.Config.Resources.Pipelines
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)

assert.Equal(t, "CAN_VIEW", pipelines["nyc_taxi_pipeline"].Permissions[0].Level)
assert.Equal(t, "my_user_name", pipelines["nyc_taxi_pipeline"].Permissions[0].UserName)

assert.Equal(t, "IS_OWNER", pipelines["nyc_taxi_pipeline"].Permissions[1].Level)
assert.Equal(t, "my_service_principal", pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName)

// Assert other resources are not affected.
assert.Equal(t, ml.Model{Name: "skynet"}, *b.Config.Resources.Models["model_one"].Model)
assert.Equal(t, catalog.CreateRegisteredModelRequest{Name: "skynet (in UC)"}, *b.Config.Resources.RegisteredModels["model_two"].CreateRegisteredModelRequest)
assert.Equal(t, ml.Experiment{Name: "experiment_one"}, *b.Config.Resources.Experiments["experiment_one"].Experiment)
assert.Equal(t, serving.CreateServingEndpoint{Name: "skynet"}, *b.Config.Resources.ModelServingEndpoints["model_serving_one"].CreateServingEndpoint)
}
Loading