Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add protos for bundle telemetry #2209

Merged
merged 9 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions libs/telemetry/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package telemetry

// RequestBody is the request body type bindings for the /telemetry-ext API endpoint.
type RequestBody struct {
// Timestamp in millis for when the log was uploaded.
UploadTime int64 `json:"uploadTime"`
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved

// DO NOT USE. This is the legacy field for logging in usage logs (not lumberjack).
// We keep this around because the API endpoint works only if this field is serialized
// to an empty array.
Items []string `json:"items"`

// JSON encoded strings containing the proto logs. Since it's represented as a
// string here, the values here end up being double JSON encoded in the final
// request body.
//
// Any logs here will be logged in our lumberjack tables as long as a corresponding
// protobuf is defined in universe.
ProtoLogs []string `json:"protoLogs"`
}

// ResponseBody is the response body type bindings for the /telemetry-ext API endpoint.
type ResponseBody struct {
Errors []LogError `json:"errors"`
NumProtoSuccess int64 `json:"numProtoSuccess"`
}

type LogError struct {
Message string `json:"message"`
ErrorType string `json:"errorType"`
}
2 changes: 2 additions & 0 deletions libs/telemetry/protos/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The types in this package are equivalent to the lumberjack protos defined in Universe.
You can find all lumberjack protos for the Databricks CLI in the `proto/logs/frontend/databricks_cli` directory.
77 changes: 77 additions & 0 deletions libs/telemetry/protos/bundle_deploy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package protos

type BundleDeployEvent struct {
// UUID associated with the bundle itself. Set in the `bundle.uuid` field in the bundle configuration.
BundleUuid string `json:"bundle_uuid,omitempty"`

ResourceCount int64 `json:"resource_count,omitempty"`
ResourceJobCount int64 `json:"resource_job_count,omitempty"`
ResourcePipelineCount int64 `json:"resource_pipeline_count,omitempty"`
ResourceModelCount int64 `json:"resource_model_count,omitempty"`
ResourceExperimentCount int64 `json:"resource_experiment_count,omitempty"`
ResourceModelServingEndpointCount int64 `json:"resource_model_serving_endpoint_count,omitempty"`
ResourceRegisteredModelCount int64 `json:"resource_registered_model_count,omitempty"`
ResourceQualityMonitorCount int64 `json:"resource_quality_monitor_count,omitempty"`
ResourceSchemaCount int64 `json:"resource_schema_count,omitempty"`
ResourceVolumeCount int64 `json:"resource_volume_count,omitempty"`
ResourceClusterCount int64 `json:"resource_cluster_count,omitempty"`
ResourceDashboardCount int64 `json:"resource_dashboard_count,omitempty"`
ResourceAppCount int64 `json:"resource_app_count,omitempty"`

// IDs of resources managed by the bundle. Some resources like volumes or schemas
// do not expose a numerical or UUID identifier and are tracked by name. Those
// resources are not tracked here since the names are PII.
ResourceJobIDs []string `json:"resource_job_ids,omitempty"`
ResourcePipelineIDs []string `json:"resource_pipeline_ids,omitempty"`
ResourceClusterIDs []string `json:"resource_cluster_ids,omitempty"`
ResourceDashboardIDs []string `json:"resource_dashboard_ids,omitempty"`

Experimental *BundleDeployExperimental `json:"experimental,omitempty"`
}

// These metrics are experimental and are often added in an adhoc manner. There
// are no guarantees for these metrics and they maybe removed in the future without
// any notice.
type BundleDeployExperimental struct {
// Number of configuration files in the bundle.
ConfigurationFileCount int64 `json:"configuration_file_count,omitempty"`

// Size in bytes of the Terraform state file
TerraformStateSizeBytes int64 `json:"terraform_state_size_bytes,omitempty"`

// Number of variables in the bundle
VariableCount int64 `json:"variable_count,omitempty"`
ComplexVariableCount int64 `json:"complex_variable_count,omitempty"`
LookupVariableCount int64 `json:"lookup_variable_count,omitempty"`

// Number of targets in the bundle
TargetCount int64 `json:"target_count,omitempty"`

// Whether a field is set or not. If a configuration field is not present in this
// map then it is not tracked by this field.
// Keys are the full path of the field in the configuration tree.
// Examples: "bundle.terraform.exec_path", "bundle.git.branch" etc.
SetFields []BoolMapEntry `json:"set_fields,omitempty"`

// Values for boolean configuration fields like `experimental.python_wheel_wrapper`
// We don't need to define protos to track boolean values and can simply write those
// values to this map to track them.
BoolValues []BoolMapEntry `json:"bool_values,omitempty"`

BundleMode BundleMode `json:"bundle_mode,omitempty"`

WorkspaceArtifactPathType BundleDeployArtifactPathType `json:"workspace_artifact_path_type,omitempty"`

// Execution time per mutator for a selected subset of mutators.
BundleMutatorExecutionTimeMs []IntMapEntry `json:"bundle_mutator_execution_time_ms,omitempty"`
}

type BoolMapEntry struct {
Key string `json:"key,omitempty"`
Value bool `json:"value,omitempty"`
}

type IntMapEntry struct {
Key string `json:"key,omitempty"`
Value int64 `json:"value,omitempty"`
}
37 changes: 37 additions & 0 deletions libs/telemetry/protos/bundle_init.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package protos

type BundleInitEvent struct {
// UUID associated with the DAB itself. This is serialized into the DAB
// when a user runs `databricks bundle init` and all subsequent deployments of
// that DAB can then be associated with this init event.
BundleUuid string `json:"bundle_uuid,omitempty"`

// Name of the template initialized when the user ran `databricks bundle init`
// This is only populated when the template is a first party template like
// mlops-stacks or default-python.
TemplateName string `json:"template_name,omitempty"`

// Arguments used by the user to initialize the template. Only enum
// values will be set here by the Databricks CLI.
//
// We use a generic map representation here because a bundle template's args are
// managed in the template itself and maintaining a copy typed schema for it here
// will be untenable in the long term.
TemplateEnumArgs []BundleInitTemplateEnumArg `json:"template_enum_args,omitempty"`
}

type BundleInitTemplateEnumArg struct {
// Valid key values for the template. These correspond to the keys specified in
// the "properties" section of the `databricks_template_schema.json` file.
//
// Note: `databricks_template_schema.json` contains a JSON schema type specification
// for the arguments that the template accepts.
Key string `json:"key"`

// Value that the user set for the field. This is only populated for properties
// that have the "enum" field specified in the JSON schema type specification.
//
// The Databricks CLI ensures that the value here is one of the "enum" values from
// the template specification.
Value string `json:"value"`
}
35 changes: 35 additions & 0 deletions libs/telemetry/protos/databricks_cli_log.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package protos

type ExecutionContext struct {
// UUID generated by the CLI for every CLI command run. This is also set in the HTTP user
// agent under the key "cmd-exec-id" and can be used to correlate frontend_log table
// with the http_access_log table.
CmdExecID string `json:"cmd_exec_id,omitempty"`

// Version of the Databricks CLI used.
Version string `json:"version,omitempty"`

// Command that was run by the user. Eg: bundle_deploy, fs_cp etc.
Command string `json:"command,omitempty"`

// Lowercase string name for the operating system. Same value
// as the one set in `runtime.GOOS` in Golang.
OperatingSystem string `json:"operating_system,omitempty"`

// Version of DBR from which CLI is being run.
// Only set when the CLI is being run from a Databricks cluster.
DbrVersion string `json:"dbr_version,omitempty"`

// If true, the CLI is being run from a Databricks notebook / cluster web terminal.
FromWebTerminal bool `json:"from_web_terminal,omitempty"`

// Time taken for the CLI command to execute.
ExecutionTimeMs int64 `json:"execution_time_ms,omitempty"`

// Exit code of the CLI command.
ExitCode int64 `json:"exit_code,omitempty"`
}

type CliTestEvent struct {
Name DummyCliEnum `json:"name,omitempty"`
}
26 changes: 26 additions & 0 deletions libs/telemetry/protos/enum.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package protos

type DummyCliEnum string

const (
DummyCliEnumUnspecified DummyCliEnum = "DUMMY_CLI_ENUM_UNSPECIFIED"
DummyCliEnumValue1 DummyCliEnum = "VALUE1"
DummyCliEnumValue2 DummyCliEnum = "VALUE2"
DummyCliEnumValue3 DummyCliEnum = "VALUE3"
)

type BundleMode string

const (
BundleModeUnspecified BundleMode = "TYPE_UNSPECIFIED"
BundleModeDevelopment BundleMode = "DEVELOPMENT"
BundleModeProduction BundleMode = "PRODUCTION"
)

type BundleDeployArtifactPathType string

const (
BundleDeployArtifactPathTypeUnspecified BundleDeployArtifactPathType = "TYPE_UNSPECIFIED"
BundleDeployArtifactPathTypeWorkspace BundleDeployArtifactPathType = "WORKSPACE_FILE_SYSTEM"
BundleDeployArtifactPathTypeVolume BundleDeployArtifactPathType = "UC_VOLUME"
)
22 changes: 22 additions & 0 deletions libs/telemetry/protos/frontend_log.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package protos

// This corresponds to the FrontendLog lumberjack proto in universe.
// FrontendLog is the top-level struct for any client-side logs at Databricks.
type FrontendLog struct {
// A UUID for the log event generated from the CLI.
FrontendLogEventID string `json:"frontend_log_event_id,omitempty"`
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved

Entry FrontendLogEntry `json:"entry,omitempty"`
}

type FrontendLogEntry struct {
DatabricksCliLog DatabricksCliLog `json:"databricks_cli_log,omitempty"`
}

type DatabricksCliLog struct {
ExecutionContext *ExecutionContext `json:"execution_context,omitempty"`

CliTestEvent *CliTestEvent `json:"cli_test_event,omitempty"`
BundleInitEvent *BundleInitEvent `json:"bundle_init_event,omitempty"`
BundleDeployEvent *BundleDeployEvent `json:"bundle_deploy_event,omitempty"`
}