From 922fb377353057f66a8e4cddef5db2fd272db125 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 20 Dec 2024 16:59:34 +0100 Subject: [PATCH] Update python-default template to use presets: catalog/schema --- .../dbt-sql/databricks_template_schema.json | 2 +- .../databricks_template_schema.json | 42 ++++++++-- .../{{.project_name}}/databricks.yml.tmpl | 15 +++- .../resources/{{.project_name}}.job.yml.tmpl | 6 +- .../{{.project_name}}.pipeline.yml.tmpl | 7 -- .../scratch/exploration.ipynb.tmpl | 79 +++++++++++++++++-- .../{{.project_name}}/src/notebook.ipynb.tmpl | 70 ++++++++++++++-- .../src/{{.project_name}}/main.py.tmpl | 34 ++++++-- .../databricks_template_schema.json | 2 +- 9 files changed, 216 insertions(+), 41 deletions(-) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index cccf145dc5..bb512153f4 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -45,7 +45,7 @@ "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide an initial schema during development.\ndefault_schema", + "description": "\nPlease provide a default schema during development.\ndefault_schema", "order": 5 } }, diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index d53bad91ab..461aaa0c46 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -4,7 +4,7 @@ "project_name": { "type": "string", "default": "my_project", - "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", + "description": "\nPlease provide a unique name for this project.\nproject_name", "order": 1, "pattern": "^[A-Za-z0-9_]+$", "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." @@ -13,23 +13,55 @@ "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?", "order": 2 }, "include_dlt": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'", + "description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?", "order": 3 }, "include_python": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'", + "description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?", "order": 4 + }, + "default_catalog": { + "type": "string", + "default": "{{default_catalog}}", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog", + "order": 5 + }, + "personal_schemas": { + "type": "string", + "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", + "enum": [ + "yes, use a schema based on the current user name during development", + "no, use a shared schema during development" + ], + "order": 6 + }, + "shared_schema": { + "skip_prompt_if": { + "properties": { + "personal_schemas": { + "const": "yes, use a schema based on the current user name during development" + } + } + }, + "type": "string", + "default": "default", + "pattern": "^\\w+$", + "pattern_match_failure_message": "Invalid schema name.", + "description": "\nPlease provide default schema during development.\ndefault_schema", + "order": 7 } }, - "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." + "success_message": "\nWorkspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml').\nworkspace_host: {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index c42b822a8d..421fe5014f 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -6,6 +6,13 @@ bundle: include: - resources/*.yml +{{- $dev_schema := .shared_schema }} +{{- $prod_schema := .shared_schema }} +{{- if (regexp "^yes").MatchString .personal_schemas}} + {{- $dev_schema = "${workspace.current_user.short_name}"}} + {{- $prod_schema = "default"}} +{{- end}} + targets: dev: # The default target uses 'mode: development' to create a development copy. @@ -16,6 +23,9 @@ targets: default: true workspace: host: {{workspace_host}} + presets: + catalog: {{.default_catalog}} + schema: {{$dev_schema}} prod: mode: production @@ -26,5 +36,6 @@ targets: permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + presets: + catalog: {{.default_catalog}} + schema: {{$prod_schema}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index 5211e3894b..0ea69a75ae 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -16,16 +16,12 @@ resources: interval: 1 unit: DAYS - {{- if not is_service_principal}} - + {{if not is_service_principal -}} email_notifications: on_failure: - {{user_name}} - {{else}} - {{end -}} - tasks: {{- if eq .include_notebook "yes" }} - task_key: notebook_task diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 50f11fe2cc..c3f94cb1c8 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -3,13 +3,6 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline - {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} - ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - # catalog: catalog_name - {{- else}} - catalog: {{default_catalog}} - {{- end}} - target: {{.project_name}}_${bundle.target} libraries: - notebook: path: ../src/dlt_pipeline.ipynb diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index 42164dff07..adb353c58c 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -27,15 +27,25 @@ }, "outputs": [], "source": [ - {{- if (eq .include_python "yes") }} + {{- if (eq .include_python "yes") }} "import sys\n", "sys.path.append('../src')\n", "from {{.project_name}} import main\n", "\n", - "main.get_taxis(spark).show(10)" - {{else}} - "spark.range(10)" - {{end -}} + {{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}} + "catalog = dbutils.widgets.get('catalog')\n", + "schema = dbutils.widgets.get('schema')\n", + "spark.sql(f'USE {catalog}.{schema}')\n", + "\n", + "spark.sql('SELECT * FROM example').show(10)" + {{- else}} + "# Load default catalog and schema as widget and set their values as the default catalog / schema\n", + "catalog = dbutils.widgets.get('catalog')\n", + "schema = dbutils.widgets.get('schema')\n", + "spark.sql(f'USE {catalog}.{schema}')\n", + "\n", + "spark.sql('SELECT * FROM example').show(10)" + {{- end}} ] } ], @@ -46,8 +56,63 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "ipynb-notebook", - "widgets": {} + "notebookName": "exploration", + "widgets": { + "catalog": { + "currentValue": "{{.default_catalog}}", + "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "{{.default_catalog}}", + "label": null, + "name": "catalog", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "{{.default_catalog}}", + "label": null, + "name": "catalog", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, +{{- $dev_schema := .shared_schema }} +{{- if (regexp "^yes").MatchString .personal_schemas}} + {{- $dev_schema = "{{short_name}}"}} +{{- end}} + "schema": { + "currentValue": "{{$dev_schema}}", + "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "{{$dev_schema}}", + "label": null, + "name": "schema", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "{{$dev_schema}}", + "label": null, + "name": "schema", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } }, "kernelspec": { "display_name": "Python 3", diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl index 6782a053ba..0924e60f35 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -23,8 +23,11 @@ "metadata": {}, "outputs": [], "source": [ - "%load_ext autoreload\n", - "%autoreload 2" + "# Load default catalog and schema as widget and set their values as the default catalog / schema\n", + {{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}} + "catalog = dbutils.widgets.get('catalog')\n", + "schema = dbutils.widgets.get('schema')\n", + "spark.sql(f'USE {catalog}.{schema}')" ] }, { @@ -47,9 +50,9 @@ {{- if (eq .include_python "yes") }} "from {{.project_name}} import main\n", "\n", - "main.get_taxis(spark).show(10)" + "main.create_example_table()" {{else}} - "spark.range(10)" + "spark.sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")" {{end -}} ] } @@ -62,7 +65,64 @@ "pythonIndentUnit": 2 }, "notebookName": "notebook", - "widgets": {} + "widgets": { + "catalog": { + "currentValue": "{{.default_catalog}}", + "nuid": "3965fc9c-8080-45b1-bee3-f75cef7685b4", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "{{.default_catalog}}", + "label": null, + "name": "catalog", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "{{.default_catalog}}", + "label": null, + "name": "catalog", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, +{{- $dev_schema := .shared_schema }} +{{- if (regexp "^yes").MatchString .personal_schemas}} + {{- $dev_schema = "{{short_name}}"}} +{{- end}} + "schema": { + "currentValue": "{{$dev_schema}}", + "nuid": "6ec0d70f-39bf-4859-a510-02c3e3d59bff", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "{{$dev_schema}}", + "label": null, + "name": "schema", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "{{$dev_schema}}", + "label": null, + "name": "schema", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + } + } }, "kernelspec": { "display_name": "Python 3", diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index c514c6dc5d..e79920a9e2 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,21 +1,39 @@ from pyspark.sql import SparkSession, DataFrame +import argparse -def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - - -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. def get_spark() -> SparkSession: + """ + Create a new Databricks Connect session. If this fails, + check that you have configured Databricks Connect correctly. + See https://docs.databricks.com/dev-tools/databricks-connect.html. + """ try: from databricks.connect import DatabricksSession return DatabricksSession.builder.getOrCreate() except ImportError: return SparkSession.builder.getOrCreate() +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + +def create_example_table(): + """ + Create a table called 'example' in the default catalog and schema. + """ + get_spark().sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column") + def main(): - get_taxis(get_spark()).show(5) + # Set the catalog and schema for the current session. + # In the default template, these parameters are set + # using the 'catalog' and 'schema' presets in databricks.yml. + parser = argparse.ArgumentParser() + parser.add_argument('--catalog', required=True) + parser.add_argument('--schema', required=True) + args, unknown = parser.parse_known_args() + spark = get_spark() + spark.sql(f"USE {args.catalog}.{args.schema}") + + create_example_table() if __name__ == '__main__': main() diff --git a/libs/template/templates/default-sql/databricks_template_schema.json b/libs/template/templates/default-sql/databricks_template_schema.json index 113cbef642..81fa7e2d22 100644 --- a/libs/template/templates/default-sql/databricks_template_schema.json +++ b/libs/template/templates/default-sql/databricks_template_schema.json @@ -45,7 +45,7 @@ "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide an initial schema during development.\ndefault_schema", + "description": "\nPlease provide a default schema during development.\ndefault_schema", "order": 5 } },