diff --git a/knowledge_base/write_from_job_to_volume/.gitignore b/knowledge_base/write_from_job_to_volume/.gitignore new file mode 100644 index 0000000..a4561ff --- /dev/null +++ b/knowledge_base/write_from_job_to_volume/.gitignore @@ -0,0 +1 @@ +.databricks/ \ No newline at end of file diff --git a/knowledge_base/write_from_job_to_volume/README.md b/knowledge_base/write_from_job_to_volume/README.md new file mode 100644 index 0000000..5194ca5 --- /dev/null +++ b/knowledge_base/write_from_job_to_volume/README.md @@ -0,0 +1,20 @@ +# Save job result to volume + +This example demonstrates how to define and use a Unity Catalog Volume in a Databricks Asset Bundle. + +Specifically we'll define a `hello_world_job` job which writes "Hello, World!" +to a file in a Unity Catalog Volume. + +The bundle also defines a Volume and the associated Schema in which the Job writes text to. + +## Prerequisites + +* Databricks CLI v0.236.0 or above + +## Usage + +Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to. + +Run `databricks bundle deploy` to deploy the job. + +Run `databricks bundle run hello_world_job` to run the job and store the results in UC volume. diff --git a/knowledge_base/write_from_job_to_volume/databricks.yml b/knowledge_base/write_from_job_to_volume/databricks.yml new file mode 100644 index 0000000..7ae0888 --- /dev/null +++ b/knowledge_base/write_from_job_to_volume/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: write_from_job_to_volume + +include: + - resources/*.yml + +workspace: + host: https://e2-dogfood.staging.cloud.databricks.com + +targets: + dev: + default: true diff --git a/knowledge_base/write_from_job_to_volume/resources/hello_world.job.yml b/knowledge_base/write_from_job_to_volume/resources/hello_world.job.yml new file mode 100644 index 0000000..36b13ab --- /dev/null +++ b/knowledge_base/write_from_job_to_volume/resources/hello_world.job.yml @@ -0,0 +1,16 @@ +resources: + jobs: + hello_world_job: + name: hello_world_job + + # No job cluster is configured. The job will run on serverless compute. + # You can explicitly configure job compute here if your workspace does + # not have serverless compute enabled. + tasks: + - task_key: hello_world_job_task + notebook_task: + notebook_path: ../src/hello.ipynb + + parameters: + - name: file_path + default: /Volumes/main/${resources.schemas.hello_world_schema.name}/${resources.volumes.my_volume.name}/hello_world.txt diff --git a/knowledge_base/write_from_job_to_volume/resources/hello_world.schema.yml b/knowledge_base/write_from_job_to_volume/resources/hello_world.schema.yml new file mode 100644 index 0000000..e1c0ed6 --- /dev/null +++ b/knowledge_base/write_from_job_to_volume/resources/hello_world.schema.yml @@ -0,0 +1,5 @@ +resources: + schemas: + hello_world_schema: + catalog_name: main + name: ${workspace.current_user.short_name}_hello_world diff --git a/knowledge_base/write_from_job_to_volume/resources/my_volume.volume.yml b/knowledge_base/write_from_job_to_volume/resources/my_volume.volume.yml new file mode 100644 index 0000000..7e47321 --- /dev/null +++ b/knowledge_base/write_from_job_to_volume/resources/my_volume.volume.yml @@ -0,0 +1,9 @@ +resources: + volumes: + my_volume: + catalog_name: main + # We use the ${resources.schemas...} interpolation syntax to force the creation + # of the schema before the volume. Usage of the ${resources.schemas...} syntax + # allows Databricks Asset Bundles to form a dependency graph between resources. + schema_name: ${resources.schemas.hello_world_schema.name} + name: my_volume diff --git a/knowledge_base/write_from_job_to_volume/src/hello.ipynb b/knowledge_base/write_from_job_to_volume/src/hello.ipynb new file mode 100644 index 0000000..093e5d4 --- /dev/null +++ b/knowledge_base/write_from_job_to_volume/src/hello.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file_path = dbutils.widgets.get(\"file_path\")\n", + "dbutils.fs.put(file_path, \"Hello World!\", overwrite=True)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}