Skip to content

Commit

Permalink
Update python-default template to use presets: catalog/schema
Browse files Browse the repository at this point in the history
  • Loading branch information
lennartkats-db committed Dec 20, 2024
1 parent 4236e71 commit 922fb37
Show file tree
Hide file tree
Showing 9 changed files with 216 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
"description": "\nPlease provide a default schema during development.\ndefault_schema",
"order": 5
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"project_name": {
"type": "string",
"default": "my_project",
"description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project",
"description": "\nPlease provide a unique name for this project.\nproject_name",
"order": 1,
"pattern": "^[A-Za-z0-9_]+$",
"pattern_match_failure_message": "Name must consist of letters, numbers, and underscores."
Expand All @@ -13,23 +13,55 @@
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
"description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
"order": 2
},
"include_dlt": {
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'",
"description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
"order": 3
},
"include_python": {
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
"description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
"order": 4
},
"default_catalog": {
"type": "string",
"default": "{{default_catalog}}",
"pattern": "^\\w*$",
"pattern_match_failure_message": "Invalid catalog name.",
"description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog",
"order": 5
},
"personal_schemas": {
"type": "string",
"description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
"enum": [
"yes, use a schema based on the current user name during development",
"no, use a shared schema during development"
],
"order": 6
},
"shared_schema": {
"skip_prompt_if": {
"properties": {
"personal_schemas": {
"const": "yes, use a schema based on the current user name during development"
}
}
},
"type": "string",
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide default schema during development.\ndefault_schema",
"order": 7
}
},
"success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
"success_message": "\nWorkspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml').\nworkspace_host: {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ bundle:
include:
- resources/*.yml

{{- $dev_schema := .shared_schema }}
{{- $prod_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "${workspace.current_user.short_name}"}}
{{- $prod_schema = "default"}}
{{- end}}

targets:
dev:
# The default target uses 'mode: development' to create a development copy.
Expand All @@ -16,6 +23,9 @@ targets:
default: true
workspace:
host: {{workspace_host}}
presets:
catalog: {{.default_catalog}}
schema: {{$dev_schema}}

prod:
mode: production
Expand All @@ -26,5 +36,6 @@ targets:
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE
run_as:
{{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
presets:
catalog: {{.default_catalog}}
schema: {{$prod_schema}}
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,12 @@ resources:
interval: 1
unit: DAYS

{{- if not is_service_principal}}

{{if not is_service_principal -}}
email_notifications:
on_failure:
- {{user_name}}

{{else}}

{{end -}}

tasks:
{{- if eq .include_notebook "yes" }}
- task_key: notebook_task
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,6 @@ resources:
pipelines:
{{.project_name}}_pipeline:
name: {{.project_name}}_pipeline
{{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
# catalog: catalog_name
{{- else}}
catalog: {{default_catalog}}
{{- end}}
target: {{.project_name}}_${bundle.target}
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,25 @@
},
"outputs": [],
"source": [
{{- if (eq .include_python "yes") }}
{{- if (eq .include_python "yes") }}
"import sys\n",
"sys.path.append('../src')\n",
"from {{.project_name}} import main\n",
"\n",
"main.get_taxis(spark).show(10)"
{{else}}
"spark.range(10)"
{{end -}}
{{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}}
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')\n",
"\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- else}}
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')\n",
"\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- end}}
]
}
],
Expand All @@ -46,8 +56,63 @@
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "ipynb-notebook",
"widgets": {}
"notebookName": "exploration",
"widgets": {
"catalog": {
"currentValue": "{{.default_catalog}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
{{- $dev_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "{{short_name}}"}}
{{- end}}
"schema": {
"currentValue": "{{$dev_schema}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
"kernelspec": {
"display_name": "Python 3",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
{{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}}
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')"
]
},
{
Expand All @@ -47,9 +50,9 @@
{{- if (eq .include_python "yes") }}
"from {{.project_name}} import main\n",
"\n",
"main.get_taxis(spark).show(10)"
"main.create_example_table()"
{{else}}
"spark.range(10)"
"spark.sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")"
{{end -}}
]
}
Expand All @@ -62,7 +65,64 @@
"pythonIndentUnit": 2
},
"notebookName": "notebook",
"widgets": {}
"widgets": {
"catalog": {
"currentValue": "{{.default_catalog}}",
"nuid": "3965fc9c-8080-45b1-bee3-f75cef7685b4",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
{{- $dev_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "{{short_name}}"}}
{{- end}}
"schema": {
"currentValue": "{{$dev_schema}}",
"nuid": "6ec0d70f-39bf-4859-a510-02c3e3d59bff",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
}
}
},
"kernelspec": {
"display_name": "Python 3",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,39 @@
from pyspark.sql import SparkSession, DataFrame
import argparse

def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")


# Create a new Databricks Connect session. If this fails,
# check that you have configured Databricks Connect correctly.
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
def get_spark() -> SparkSession:
"""
Create a new Databricks Connect session. If this fails,
check that you have configured Databricks Connect correctly.
See https://docs.databricks.com/dev-tools/databricks-connect.html.
"""
try:
from databricks.connect import DatabricksSession
return DatabricksSession.builder.getOrCreate()
except ImportError:
return SparkSession.builder.getOrCreate()

def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")

def create_example_table():
"""
Create a table called 'example' in the default catalog and schema.
"""
get_spark().sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")

def main():
get_taxis(get_spark()).show(5)
# Set the catalog and schema for the current session.
# In the default template, these parameters are set
# using the 'catalog' and 'schema' presets in databricks.yml.
parser = argparse.ArgumentParser()
parser.add_argument('--catalog', required=True)
parser.add_argument('--schema', required=True)
args, unknown = parser.parse_known_args()
spark = get_spark()
spark.sql(f"USE {args.catalog}.{args.schema}")

create_example_table()

if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
"description": "\nPlease provide a default schema during development.\ndefault_schema",
"order": 5
}
},
Expand Down

0 comments on commit 922fb37

Please sign in to comment.