From 0e6f4d817136e9746fd017b587ba14744673b9b2 Mon Sep 17 00:00:00 2001
From: Alonso Astroza Tagle <alonsoastroza@gmail.com>
Date: Sun, 28 Apr 2024 23:50:46 -0400
Subject: [PATCH 01/15] python_version in Image declaration (#720)

---
 06_gpu_and_ml/llm-serving/vllm_inference.py | 2 +-
 06_gpu_and_ml/llm-serving/vllm_mixtral.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/06_gpu_and_ml/llm-serving/vllm_inference.py b/06_gpu_and_ml/llm-serving/vllm_inference.py
index c3d0375c4..9f6b23a6a 100644
--- a/06_gpu_and_ml/llm-serving/vllm_inference.py
+++ b/06_gpu_and_ml/llm-serving/vllm_inference.py
@@ -56,7 +56,7 @@ def download_model_to_image(model_dir, model_name):
 # We’ll start from Modal's Debian slim image.
 # Then we’ll use `run_function` with `download_model_to_image` to write the model into the container image.
 image = (
-    modal.Image.debian_slim()
+    modal.Image.debian_slim(python_version="3.10")
     .pip_install(
         "vllm==0.4.0.post1",
         "torch==2.1.2",
diff --git a/06_gpu_and_ml/llm-serving/vllm_mixtral.py b/06_gpu_and_ml/llm-serving/vllm_mixtral.py
index 9bb9471ba..57618ae28 100644
--- a/06_gpu_and_ml/llm-serving/vllm_mixtral.py
+++ b/06_gpu_and_ml/llm-serving/vllm_mixtral.py
@@ -63,7 +63,7 @@ def download_model_to_image(model_dir, model_name, model_revision):
 # the model are saved within the container image.
 
 vllm_image = (
-    modal.Image.debian_slim()
+    modal.Image.debian_slim(python_version="3.10")
     .pip_install(
         "vllm==0.4.0.post1",
         "torch==2.1.2",

From dabdf155706bf8f56b3a9b4ebc15afedd95030a3 Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Sun, 28 Apr 2024 22:45:00 -0700
Subject: [PATCH 02/15] Clean up instructor example (#722)

* removes extra inference file

* cleans up instructor example
---
 .../llm-structured/instructor/inference.py    |  79 ---------
 .../instructor/instructor_generate.py         | 161 ++++++++++--------
 2 files changed, 90 insertions(+), 150 deletions(-)
 delete mode 100644 06_gpu_and_ml/llm-structured/instructor/inference.py

diff --git a/06_gpu_and_ml/llm-structured/instructor/inference.py b/06_gpu_and_ml/llm-structured/instructor/inference.py
deleted file mode 100644
index d8e765764..000000000
--- a/06_gpu_and_ml/llm-structured/instructor/inference.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# # Fast inference with vLLM (Mistral 7B)
-#
-# In this example, we show how to run basic inference, using [`vLLM`](https://github.com/vllm-project/vllm)
-# to take advantage of PagedAttention, which speeds up sequential inferences with optimized key-value caching.
-
-import os
-import subprocess
-
-from modal import App, Image, Secret, gpu, web_server
-
-MODEL_DIR = "/model"
-BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"
-
-
-# ## Define a container image
-
-
-# We want to create a Modal image which has the model weights pre-saved to a directory. The benefit of this
-# is that the container no longer has to re-download the model from Huggingface - instead, it will take
-# advantage of Modal's internal filesystem for faster cold starts.
-#
-# ### Download the weights
-# We can download the model to a particular directory using the HuggingFace utility function `snapshot_download`.
-#
-# Tip: avoid using global variables in this function. Changes to code outside this function will not be detected and the download step will not re-run.
-def download_model_to_folder():
-    from huggingface_hub import snapshot_download
-    from transformers.utils import move_cache
-
-    os.makedirs(MODEL_DIR, exist_ok=True)
-
-    snapshot_download(
-        BASE_MODEL,
-        local_dir=MODEL_DIR,
-        ignore_patterns=["*.pt", "*.bin"],  # Using safetensors
-    )
-    move_cache()
-
-
-# ### Image definition
-# We'll start from a recommended Docker Hub image and install `vLLM`.
-# Then we'll use `run_function` to run the function defined above to ensure the weights of
-# the model are saved within the container image.
-image = (
-    Image.from_registry(
-        "nvidia/cuda:12.1.1-devel-ubuntu22.04", add_python="3.10"
-    )
-    .pip_install(
-        "vllm==0.2.5",
-        "huggingface_hub==0.19.4",
-        "hf-transfer==0.1.4",
-        "torch==2.1.2",
-    )
-    # Use the barebones hf-transfer package for maximum download speeds. No progress bar, but expect 700MB/s.
-    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
-    .run_function(
-        download_model_to_folder,
-        secrets=[Secret.from_name("huggingface")],
-        timeout=60 * 20,
-    )
-)
-
-app = App(
-    "vllm-inference-openai-compatible", image=image
-)  # Note: prior to April 2024, "app" was called "stub"
-
-
-GPU_CONFIG = gpu.A100(count=1)  # 40GB A100 by default
-
-
-@app.function(
-    allow_concurrent_inputs=100,
-    gpu=GPU_CONFIG,
-)
-@web_server(8000, startup_timeout=90)
-def openai_compatible_server():
-    target = BASE_MODEL
-    cmd = f"python -m vllm.entrypoints.openai.api_server --model {target} --host 0.0.0.0 --port 8000"
-    subprocess.Popen(cmd, shell=True)
diff --git a/06_gpu_and_ml/llm-structured/instructor/instructor_generate.py b/06_gpu_and_ml/llm-structured/instructor/instructor_generate.py
index ca6d66fec..242f419d3 100644
--- a/06_gpu_and_ml/llm-structured/instructor/instructor_generate.py
+++ b/06_gpu_and_ml/llm-structured/instructor/instructor_generate.py
@@ -3,47 +3,46 @@
 # ---
 # # Structured Data Extraction using `instructor`
 #
-# This example demonstrates how to use the `instructor` library to extract structured data from unstructured text.
+# This example demonstrates how to use the `instructor` library to extract structured, schematized data from unstructured text.
 #
-# Structured output is a powerful but under-appreciated feature of LLMs,
-# because it makes it easier to connect LLMs to other software,
-# for example enabling the ingestion of unstructured data into structured databases.
+# Structured output is a powerful but under-appreciated feature of LLMs.
+# Structured output allows LLMs and multimodal models to connect to traditional software,
+# for example enabling the ingestion of unstructured data like text files into structured databases.
+# Applied properly, it makes them an extreme example of the [Robustness Principle](https://en.wikipedia.org/wiki/Robustness_principle)
+# Jon Postel formulated for TCP: "Be conservative in what you send, be liberal in what you accept".
 #
-# The unstructured data in this example is the code from the examples in the Modal examples repository --
-# including this one!
-#
-# We use this exact code to monitor the coverage of the examples
-# and to make decisions about which examples to write next!
+# The unstructured data used in this example code is the code from the examples in the Modal examples repository --
+# including this example's code!
 #
 # The output includes a JSONL file containing, on each line, the metadata extracted from the code in one example.
 # This can be consumed downstream by other software systems, like a database or a dashboard.
 #
-# We include in this folder a Jupyter notebook with some basic analyses.
-#
 # ## Environment setup
 #
-# We setup the environment our code will run in first.
+# We set up the environment our code will run in first.
 # In Modal, we define environments via [container images](https://modal.com/docs/guide/custom-container),
 # much like Docker images, by iteratively chaining together commands.
 #
-# This example also uses models from Anthropic, so if you want to run it yourself,
-# you'll need to set up a Modal [`Secret`](https://modal.com/docs/guide/secrets)
-# called `my-anthropic-secret` for your OpenAI API key.
+# Here there's just one command, installing instructor and the Python SDK for Anthropic's LLM API.
 from pathlib import Path
 from typing import Literal, Optional
 
 import modal
+from pydantic import BaseModel, Field
 
 image = modal.Image.debian_slim(python_version="3.11").pip_install(
-    "instructor~=1.0.0", "anthropic~=0.23.1", "matplotlib~=3.8.3"
+    "instructor~=1.0.0", "anthropic~=0.23.1"
 )
 
+# This example uses models from Anthropic, so if you want to run it yourself,
+# you'll need to set up a Modal [`Secret`](https://modal.com/docs/guide/secrets)
+# called `my-anthropic-secret` for your OpenAI API key.
+
 app = modal.App(
     image=image, secrets=[modal.Secret.from_name("my-anthropic-secret")]
 )  # Note: prior to April 2024, "app" was called "stub"
 
-
-# ## The overall flow
+# ## Running Modal functions from the command line
 #
 # We'll run the example by calling `modal run instructor_generate.py` from the command line.
 #
@@ -64,7 +63,7 @@
 
 
 @app.local_entrypoint()
-def main(limit: int = 15, with_opus: bool = False):
+def main(limit: int = 1, with_opus: bool = False):
     # find all of the examples in the repo
     examples = get_examples()
     # optionally limit the number of examples we process
@@ -72,17 +71,17 @@ def main(limit: int = 15, with_opus: bool = False):
         examples = [None]  # just run on this example
     else:
         examples = examples[:limit]
-    if examples:
-        # use Modal to map our extraction function over the examples concurrently
-        results = extract_example_metadata.map(
-            [
-                f"{example.stem}\n" + Path(example.filename).read_text()
-                if example
-                else None
-                for example in examples
-            ],
-            kwargs={"with_opus": with_opus},
-        )
+    # use Modal to map our extraction function over the examples concurrently
+    results = extract_example_metadata.map(
+        (  # iterable of file contents
+            Path(example.filename).read_text() if example else None
+            for example in examples
+        ),
+        (  # iterable of filenames
+            example.stem if example else None for example in examples
+        ),
+        kwargs={"with_opus": with_opus},
+    )
 
     # save the results to a local file
     results_path = Path("/tmp") / "instructor_generate" / "results.jsonl"
@@ -97,15 +96,65 @@ def main(limit: int = 15, with_opus: bool = False):
             f.write(result + "\n")
 
 
-# ## Extracting JSON from unstructured text with `instructor`
+# ## Extracting JSON from unstructured text with `instructor` and Pydantic
 #
-# The real meat of this example is here, in the `extract_example_metadata` function.
+# The real meat of this example is in this section, in the `extract_example_metadata` function and its schemas.
 #
-# TODO: write this up
-# TODO: refactor classes out of this function, explain separately
+# We define a schema for the data we want the LLM to extract, using Pydantic.
+# Instructor ensures that the LLM's output matches this schema.
+#
+# We can use the type system provided by Python and Pydantic to express many useful features
+# of the data we want to extract -- ranging from wide-open fields like a `str`ing-valued `summary`
+# to constrained fields like `difficulty`, which can only take on value between 1 and 5.
+
 
+class ExampleMetadataExtraction(BaseModel):
+    """Extracted metadata about an example from the Modal examples repo."""
 
-@app.function(concurrency_limit=5)  # watch those rate limits!
+    summary: str = Field(..., description="A brief summary of the example.")
+    has_thorough_explanation: bool = Field(
+        ...,
+        description="The example contains, in the form of inline comments with markdown formatting, a thorough explanation of what the code does.",
+    )
+    domains: list[
+        Literal[
+            "artificial_intelligence",
+            "machine_learning",
+            "data_science",
+            "web_serving",
+            "parallel_computing",
+        ]
+    ] = Field(..., description="The")
+    difficulty: Literal[1, 2, 3, 4, 5] = Field(
+        ...,
+        description="The difficulty of the example, from 1 to 5. An example that uses only one or two basic Modal features and is understandable by a professional Python developer familiar with the basics of the relevant domains is a 1, while an example that uses many Modal features and uses advanced Python features like async generator coroutines or metaclasses is a 5.",
+    )
+    freshness: float = Field(
+        ...,
+        description="The freshness of the example, from 0 to 1. This is relative to your knowledge cutoff. Examples are less fresh if they use older libraries and tools.",
+    )
+
+
+# That schema describes the data to be extracted by the LLM, but not all data is best extracted by an LLM.
+# For example, the filename is easily determined in software.
+#
+# So we inject that information into the output after the LLM has done its work. That necessitates
+# an additional schema, which inherits from the first.
+
+
+class ExampleMetadata(ExampleMetadataExtraction):
+    """Metadata about an example from the Modal examples repo."""
+
+    filename: Optional[str] = Field(
+        ..., description="The filename of the example."
+    )
+
+
+# With these schemas in hand, it's straightforward to write the function that extracts the metadata.
+# Note that we decorate it with `@app.function` to make it run on Modal.
+
+
+@app.function(concurrency_limit=5)  # watch those LLM API rate limits!
 def extract_example_metadata(
     example_contents: Optional[str] = None,
     filename: Optional[str] = None,
@@ -113,47 +162,16 @@ def extract_example_metadata(
 ):
     import instructor
     from anthropic import Anthropic
-    from pydantic import BaseModel, Field
 
+    # if no example is provided, use the contents of this example
     if example_contents is None:
         example_contents = Path(__file__).read_text()
         filename = Path(__file__).name
 
-    class ExampleMetadataExtraction(BaseModel):
-        """Extracted metadata about an example from the Modal examples repo."""
-
-        summary: str = Field(..., description="A brief summary of the example.")
-        has_thorough_explanation: bool = Field(
-            ...,
-            description="The example contains, in the form of inline comments with markdown formatting, a thorough explanation of what the code does.",
-        )
-        domains: list[
-            Literal[
-                "artificial_intelligence",
-                "machine_learning",
-                "data_science",
-                "web_serving",
-                "parallel_computing",
-            ]
-        ] = Field(..., description="The")
-        difficulty: Literal[1, 2, 3, 4, 5] = Field(
-            ...,
-            description="The difficulty of the example, from 1 to 5. An example that uses only one or two basic Modal features and is understandable by a professional Python developer familiar with the basics of the relevant domains is a 1, while an example that uses many Modal features and uses advanced Python features like async generator coroutines or metaclasses is a 5.",
-        )
-        freshness: float = Field(
-            ...,
-            description="The freshness of the example, from 0 to 1. This is relative to your knowledge cutoff. Examples are less fresh if they use older libraries and tools.",
-        )
-
-    class ExampleMetadata(ExampleMetadataExtraction):
-        """Metadata about an example from the Modal examples repo."""
-
-        filename: str = Field(..., description="The filename of the example.")
-
     client = instructor.from_anthropic(Anthropic())
-
     model = "claude-3-opus-20240229" if with_opus else "claude-3-haiku-20240307"
 
+    # add the schema as the `response_model` argument in what otherwise looks like a normal LLM API call
     extracted_metadata = client.messages.create(
         model=model,
         temperature=0.0,
@@ -167,18 +185,19 @@ class ExampleMetadata(ExampleMetadataExtraction):
         ],
     )
 
+    # inject the filename
     full_metadata = ExampleMetadata(
         **extracted_metadata.dict(), filename=filename
     )
 
+    # return it as JSON
     return full_metadata.model_dump_json()
 
 
 # ## Addenda
 #
 # The rest of the code used in this example is not particularly interesting:
-# some boilerplate matplotlib code to generate the figures,
-# and a utility function to find all of the examples.
+# just a utility function to find all of the examples, which we invoke in the `local_entrypoint` above.
 
 
 def get_examples(silent=True):
@@ -195,7 +214,7 @@ def get_examples(silent=True):
     spec.loader.exec_module(example_utils)
     examples = [
         example
-        for example in example_utils.get_examples(silent=silent)
+        for example in example_utils.get_examples()
         if example.type != 2  # filter out non-code assets
     ]
     return examples

From d35dd4386b8c5248f5e83e2899f9177fa6cdb1fc Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Sun, 28 Apr 2024 23:24:26 -0700
Subject: [PATCH 03/15] resurrect blender (#723)

* adds a refreshed blender example

* adds gif of final render
---
 06_gpu_and_ml/blender/blender_video.py | 296 +++++++++++++++++++++++++
 1 file changed, 296 insertions(+)
 create mode 100644 06_gpu_and_ml/blender/blender_video.py

diff --git a/06_gpu_and_ml/blender/blender_video.py b/06_gpu_and_ml/blender/blender_video.py
new file mode 100644
index 000000000..f5cc55622
--- /dev/null
+++ b/06_gpu_and_ml/blender/blender_video.py
@@ -0,0 +1,296 @@
+# ---
+# output-directory: "/tmp/render"
+# ---
+# # Render a video with Blender on many GPUs or CPUs in parallel
+#
+# This example shows how you can render an animated 3D scene using
+# [Blender](https://www.blender.org/)'s Python interface.
+#
+# You can run it on CPUs to scale out on one hundred of containers
+# or run it on GPUs to get higher throughput per node.
+# Even with this simple scene, GPUs render 2x faster than CPUs.
+#
+# The final render looks something like this:
+#
+# ![Spinning Modal logo](https://modal-public-assets.s3.amazonaws.com/modal-blender-render.gif)
+#
+# ## Defining a Modal app
+
+import io
+import math
+from pathlib import Path
+
+import modal
+
+# Modal runs your Python functions for you in the cloud.
+# You organize your code into apps, collections of functions that work together.
+
+app = modal.App("examples-blender-logo")
+
+# We need to define the environment each function runs in --  its container image.
+# The block below defines a container image, starting from a basic Debian Linux image
+# adding Blender's system-level dependencies
+# and then installing the `bpy` package, which is Blender's Python API.
+
+rendering_image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .apt_install("xorg", "libxkbcommon0")  # X11 (Unix GUI) dependencies
+    .pip_install("bpy")  # Blender as a Python package
+)
+
+# ## Rendering a single frame
+#
+# We define a function that renders a single frame. We'll scale this function out on Modal later.
+#
+# Functions in Modal are defined along with their hardware and their dependencies.
+# This function can be run with GPU acceleration or without it, and we'll use a global flag in the code to switch between the two.
+
+WITH_GPU = True  # try changing this to False to run rendering massively in parallel on CPUs!
+
+# We decorate the function with `@app.function` to define it as a Modal function.
+# Note that in addition to defining the hardware requirements of the function,
+# we also specify the container image that the function runs in (the one we defined above).
+
+# The details of the rendering function aren't too important for this example,
+# so we abstract them out into functions defined at the end of the file.
+# We draw a simple version of the Modal logo:
+# two neon green rectangular prisms facing different directions.
+# We include a parameter to rotate the prisms around the vertical/Z axis,
+# which we'll use to animate the logo.
+
+
+@app.function(
+    gpu="T4" if WITH_GPU else None,
+    concurrency_limit=10
+    if WITH_GPU
+    else 100,  # default limits on Modal free tier
+    image=rendering_image,
+)
+def render(angle: int = 0) -> bytes:
+    """
+    Renders Modal's logo, two neon green rectangular prisms.
+
+
+    Args:
+        angle: How much to rotate the two prisms around the vertical/Z axis, in degrees.
+
+    Returns:
+        The rendered frame as a PNG image.
+    """
+    import bpy
+
+    # clear existing objects
+    bpy.ops.object.select_all(action="DESELECT")
+    bpy.ops.object.select_by_type(type="MESH")
+    bpy.ops.object.delete()
+
+    # ctx: the current Blender state, which we mutate
+    ctx = bpy.context
+
+    # scene: the 3D environment we are rendering and its camera(s)
+    scene = ctx.scene
+
+    # configure rendering -- CPU or GPU, resolution, etc.
+    # see function definition below for details
+    configure_rendering(ctx, WITH_GPU)
+
+    scene.render.image_settings.file_format = "PNG"
+    scene.render.filepath = "output.png"
+
+    # set background to black
+    black = (0, 0, 0, 1)
+    scene.world.node_tree.nodes["Background"].inputs[0].default_value = black
+
+    # add the Modal logo: two neon green rectangular prisms
+    iridescent_material = create_iridescent_material()
+
+    add_prism(ctx, (-1, 0, 0), 45, angle, iridescent_material)
+    add_prism(ctx, (3, 0, 0), -45, angle, iridescent_material)
+
+    # set up the lighting and camera
+    bpy.ops.object.light_add(type="POINT", location=(5, 5, 5))
+    bpy.context.object.data.energy = 10
+    bpy.ops.object.camera_add(location=(7, -7, 5))
+    scene.camera = bpy.context.object
+    ctx.object.rotation_euler = (1.1, 0, 0.785)
+
+    # render
+    bpy.ops.render.render(write_still=True)
+
+    # return the bytes to the caller
+    with open(scene.render.filepath, "rb") as image_file:
+        image_bytes = image_file.read()
+
+    return image_bytes
+
+
+# ### Rendering with acceleration
+#
+# We can configure the rendering process to use GPU acceleration with NVIDIA CUDA.
+# We select the [Cycles rendering engine](https://www.cycles-renderer.org/), which is compatible with CUDA,
+# and then activate the GPU.
+
+
+def configure_rendering(ctx, with_gpu: bool):
+    # configure the rendering process
+    ctx.scene.render.engine = "CYCLES"
+    ctx.scene.render.resolution_x = 1920
+    ctx.scene.render.resolution_y = 1080
+    ctx.scene.render.resolution_percentage = 100
+
+    # add GPU acceleration if available
+    if with_gpu:
+        ctx.preferences.addons[
+            "cycles"
+        ].preferences.compute_device_type = "CUDA"
+        ctx.scene.cycles.device = "GPU"
+
+        # reload the devices to update the configuration
+        ctx.preferences.addons["cycles"].preferences.get_devices()
+        for device in ctx.preferences.addons["cycles"].preferences.devices:
+            device.use = True
+
+    else:
+        ctx.scene.cycles.device = "CPU"
+
+    # report rendering devices -- a nice snippet for debugging and ensuring the accelerators are being used
+    for dev in ctx.preferences.addons["cycles"].preferences.devices:
+        print(
+            f"ID:{dev['id']} Name:{dev['name']} Type:{dev['type']} Use:{dev['use']}"
+        )
+
+
+# ## Combining frames into a GIF
+#
+# Rendering 3D images is fun, and GPUs can make it faster, but rendering 3D videos is better!
+# We add another function to our app, running on a different, simpler container image
+# and different hardware, to combine the frames into a GIF.
+
+combination_image = modal.Image.debian_slim(python_version="3.11").pip_install(
+    "pillow==10.3.0"
+)
+
+# The video has a few parameters, which we set here.
+
+FPS = 60
+FRAME_DURATION_MS = 1000 // FPS
+NUM_FRAMES = 360  # drop this for faster iteration while playing around
+
+# The function to combine the frames into a GIF takes a sequence of byte sequences, one for each rendered frame,
+# and converts them into a single sequence of bytes, the GIF.
+
+
+@app.function(image=combination_image)
+def combine(
+    frames_bytes: list[bytes], frame_duration: int = FRAME_DURATION_MS
+) -> bytes:
+    print("🎞️ combining frames into a gif")
+    from PIL import Image
+
+    frames = [
+        Image.open(io.BytesIO(frame_bytes)) for frame_bytes in frames_bytes
+    ]
+
+    gif_image = io.BytesIO()
+    frames[0].save(
+        gif_image,
+        format="GIF",
+        save_all=True,
+        append_images=frames[1:],
+        duration=frame_duration,
+        loop=0,
+    )
+
+    gif_image.seek(0)
+
+    return gif_image.getvalue()
+
+
+# ## Rendering in parallel in the cloud from the comfort of the command line
+#
+# With these two functions defined, we need only a few more lines to run our rendering at scale on Modal.
+#
+# First, we need a function that coordinates our functions to `render` frames and `combine` them.
+# We decorate that function with `@app.local_entrypoint` so that we can run it with `modal run blender_video.py`.
+#
+# In that function, we use `render.map` to map the `render` function over a `range` of `angle`s,
+# so that the logo will appear to spin in the final video.
+#
+# We collect the bytes from each frame into a `list` locally and then send it to `combine` with `.remote`.
+#
+# The bytes for the video come back to our local machine, and we write them to a file.
+#
+# The whole rendering process (for six seconds of 1080p 60 FPS video) takes between five and ten minutes on 10 T4 GPUs.
+
+
+@app.local_entrypoint()
+def main():
+    output_directory = Path("/tmp") / "render"
+    output_directory.mkdir(parents=True, exist_ok=True)
+    filename = output_directory / "output.gif"
+    with open(filename, "wb") as out_file:
+        out_file.write(
+            combine.remote(list(render.map(range(0, 360, 360 // NUM_FRAMES))))
+        )
+    print(f"Image saved to {filename}")
+
+
+# ## Addenda
+#
+# The remainder of the code in this example defines the details of the render.
+# It's not particularly interesting, so we put it the end of the file.
+
+
+def add_prism(ctx, location, initial_rotation, angle, material):
+    """Add a prism at a given location, rotation, and angle, made of the provided material."""
+    import bpy
+    import mathutils
+
+    bpy.ops.mesh.primitive_cube_add(size=2, location=location)
+    obj = ctx.object  # the newly created object
+
+    # assign the material to the object
+    obj.data.materials.append(material)
+
+    obj.scale = (1, 1, 2)  # square base, 2x taller than wide
+    # Modal logo is rotated 45 degrees
+    obj.rotation_euler[1] = math.radians(initial_rotation)
+
+    # apply initial transformations
+    bpy.ops.object.transform_apply(location=True, rotation=True, scale=True)
+
+    # to "animate" the rendering, we rotate the prisms around the Z axis
+    angle_radians = math.radians(angle)
+    rotation_matrix = mathutils.Matrix.Rotation(angle_radians, 4, "Z")
+    obj.matrix_world = rotation_matrix @ obj.matrix_world
+    bpy.ops.object.transform_apply(location=True, rotation=True, scale=True)
+
+
+def create_iridescent_material():
+    import bpy
+
+    mat = bpy.data.materials.new(name="IridescentGreen")
+    mat.use_nodes = True
+    nodes = mat.node_tree.nodes
+    links = mat.node_tree.links
+
+    nodes.clear()
+
+    output_node = nodes.new(type="ShaderNodeOutputMaterial")
+    emission_node = nodes.new(type="ShaderNodeEmission")
+    layer_weight = nodes.new(type="ShaderNodeLayerWeight")
+    color_ramp = nodes.new(type="ShaderNodeValToRGB")
+
+    color_ramp.color_ramp.elements[0].color = (0, 0, 0, 1)
+    color_ramp.color_ramp.elements[1].color = (0, 1, 0, 1)
+    layer_weight.inputs["Blend"].default_value = 0.4
+
+    links.new(layer_weight.outputs["Fresnel"], color_ramp.inputs["Fac"])
+    links.new(color_ramp.outputs["Color"], emission_node.inputs["Color"])
+
+    emission_node.inputs["Strength"].default_value = 5.0
+    emission_node.inputs["Color"].default_value = (0.0, 1.0, 0.0, 1)
+
+    links.new(emission_node.outputs["Emission"], output_node.inputs["Surface"])
+
+    return mat

From f23c5c20168e5585341d39a27cf825b5117c6c4c Mon Sep 17 00:00:00 2001
From: "devin-ai-integration[bot]"
 <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sun, 28 Apr 2024 23:53:47 -0700
Subject: [PATCH 04/15] Add detailed example for Fooocus on Modal (#721)

* Add Fooocus Modal example with detailed comments and Markdown sections

* edits fooocus example

---------

Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Charles Frye <charles@modal.com>
---
 misc/run_fooocus.py | 99 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 misc/run_fooocus.py

diff --git a/misc/run_fooocus.py b/misc/run_fooocus.py
new file mode 100644
index 000000000..904f0d530
--- /dev/null
+++ b/misc/run_fooocus.py
@@ -0,0 +1,99 @@
+# # Generate: Fooocus
+#
+# This example demonstrates how to set up and run a web server using the Modal library with Fooocus as the frontend.
+# Fooocus provides a beginner-friendly interface to work with the SDXL 1.0 model for image generation tasks.
+# The script includes the setup of a Docker image, initialization of Fooocus, and launching a web server with GPU support.
+#
+# ## Basic setup
+
+import modal
+
+# To create an image that can run Fooocus, we start from an official NVIDIA base image and then add Python
+# and a few system packages.
+#
+# We then download the Fooocus repository.
+
+image = (
+    modal.Image.from_registry(
+        "nvidia/cuda:12.3.1-base-ubuntu22.04", add_python="3.10"
+    )
+    .apt_install(
+        "software-properties-common",
+        "git",
+        "git-lfs",
+        "coreutils",
+        "aria2",
+        "libgl1",
+        "libglib2.0-0",
+        "curl",
+        "wget",
+        "libsm6",
+        "libxrender1",
+        "libxext6",
+        "ffmpeg",
+    )
+    .run_commands("git clone https://github.com/lllyasviel/Fooocus.git")
+)
+
+# ## Initialize Fooocus
+#
+# We are not limited to running shell commands and package installers in the image setup.
+# We can also run Python functions by defining them in our code and passing them to the `run_function` method.
+#
+# This function installs Fooocus's dependencies and downloads the SDXL 1.0 model to the container image.
+#
+# This all happens at the time the container image is defined, so that the image is ready to run Fooocus when it is deployed.
+
+
+def init_Fooocus():
+    import os
+    import subprocess
+
+    # change the working directory to the Fooocus directory and install the required Python packages from the requirements file.
+    os.chdir("/Fooocus")
+    os.system("pip install -r requirements_versions.txt")
+
+    # change the directory to the models' checkpoints and download the SDXL 1.0 model using wget.
+    os.chdir("./models/checkpoints")
+    subprocess.run(
+        "wget -O juggernautXL_v8Rundiffusion.safetensors 'https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/juggernautXL_v8Rundiffusion.safetensors'",
+        shell=True,
+    )
+
+
+GPU_CONFIG = modal.gpu.T4()
+image = image.run_function(init_Fooocus, gpu=GPU_CONFIG)
+
+# ## Run Fooocus
+#
+# The `run` function is decorated with `app.function` to define it as a Modal function.
+# The `web_server` decorator indicates that this function will serve a web application on the specified port.
+# We increase the startup timeout to three minutes to account for the time it takes to load the model and start the server.
+
+app = modal.App("Fooocus", image=image)
+
+PORT = 8000
+MINUTES = 60
+
+
+@app.function(gpu=GPU_CONFIG, timeout=10 * MINUTES)
+@modal.web_server(port=PORT, startup_timeout=3 * MINUTES)
+def run():
+    import os
+    import subprocess
+
+    # change the working directory to the Fooocus directory.
+    os.chdir("/Fooocus")
+
+    # launch the Fooocus application using a subprocess that listens on the specified port
+    subprocess.Popen(
+        [
+            "python",
+            "launch.py",
+            "--listen",
+            "0.0.0.0",
+            "--port",
+            str(PORT),
+            "--always-high-vram",
+        ]
+    )

From f87d12deca8f8e14072dd91461ad17d87546ecd8 Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Mon, 29 Apr 2024 14:21:10 -0700
Subject: [PATCH 05/15] remove unnecessary nesting of instructor (#725)

will add local inference version as a peer when ready
---
 .../llm-structured/{instructor => }/instructor_generate.py        | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename 06_gpu_and_ml/llm-structured/{instructor => }/instructor_generate.py (100%)

diff --git a/06_gpu_and_ml/llm-structured/instructor/instructor_generate.py b/06_gpu_and_ml/llm-structured/instructor_generate.py
similarity index 100%
rename from 06_gpu_and_ml/llm-structured/instructor/instructor_generate.py
rename to 06_gpu_and_ml/llm-structured/instructor_generate.py

From f3341012c93f69fdd4729809de948d8753d1d04d Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Mon, 29 Apr 2024 19:15:01 -0700
Subject: [PATCH 06/15] faster renders, numbers on throughput and latency
 (#726)

---
 06_gpu_and_ml/blender/blender_video.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/06_gpu_and_ml/blender/blender_video.py b/06_gpu_and_ml/blender/blender_video.py
index f5cc55622..ee1ed85a7 100644
--- a/06_gpu_and_ml/blender/blender_video.py
+++ b/06_gpu_and_ml/blender/blender_video.py
@@ -137,6 +137,7 @@ def configure_rendering(ctx, with_gpu: bool):
     ctx.scene.render.resolution_x = 1920
     ctx.scene.render.resolution_y = 1080
     ctx.scene.render.resolution_percentage = 100
+    ctx.scene.cycles.samples = 128
 
     # add GPU acceleration if available
     if with_gpu:
@@ -220,7 +221,8 @@ def combine(
 #
 # The bytes for the video come back to our local machine, and we write them to a file.
 #
-# The whole rendering process (for six seconds of 1080p 60 FPS video) takes between five and ten minutes on 10 T4 GPUs.
+# The whole rendering process (for six seconds of 1080p 60 FPS video) takes about five minutes to run on 10 T4 GPUs,
+# with a per-frame latency of under 10 seconds, and about two minutes to run on 100 CPUs, with a per-frame latency of about 30 seconds.
 
 
 @app.local_entrypoint()

From 54c379561c03fde020d43014721bd7a857fe875d Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Thu, 2 May 2024 19:30:03 -0700
Subject: [PATCH 07/15] fixes newly-gated models in certain examples (#727)

* remove extra line

* adds instructions for handling gated model

* handles gating for Mistral 7B in outlines example
---
 06_gpu_and_ml/llm-serving/tgi_mixtral.py          |  1 -
 06_gpu_and_ml/llm-serving/vllm_inference.py       |  9 +++++++++
 06_gpu_and_ml/llm-structured/outlines_generate.py | 14 ++++++++++++--
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/06_gpu_and_ml/llm-serving/tgi_mixtral.py b/06_gpu_and_ml/llm-serving/tgi_mixtral.py
index c4313043c..5ca7da284 100644
--- a/06_gpu_and_ml/llm-serving/tgi_mixtral.py
+++ b/06_gpu_and_ml/llm-serving/tgi_mixtral.py
@@ -48,7 +48,6 @@
 # We can use the included utilities to download the model weights (and convert to safetensors, if necessary)
 # as part of the image build.
 #
-#
 # For this step to work on a [gated model](https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/gated_model_access)
 # like Mixtral 8x7B, the `HF_TOKEN` environment variable must be set.
 #
diff --git a/06_gpu_and_ml/llm-serving/vllm_inference.py b/06_gpu_and_ml/llm-serving/vllm_inference.py
index 9f6b23a6a..3f67aa908 100644
--- a/06_gpu_and_ml/llm-serving/vllm_inference.py
+++ b/06_gpu_and_ml/llm-serving/vllm_inference.py
@@ -36,6 +36,13 @@
 # ### Download the weights
 # We can download the model to a particular directory using the HuggingFace utility function `snapshot_download`.
 #
+# For this step to work on a [gated model](https://huggingface.co/docs/hub/en/models-gated)
+# like Mistral 7B, the `HF_TOKEN` environment variable must be set.
+#
+# After [creating a HuggingFace access token](https://huggingface.co/settings/tokens)
+# and accepting the [terms of use](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1),
+# head to the [secrets page](https://modal.com/secrets) to share it with Modal as `huggingface-secret`.
+#
 # Tip: avoid using global variables in this function.
 # Changes to code outside this function will not be detected, and the download step will not re-run.
 def download_model_to_image(model_dir, model_name):
@@ -48,6 +55,7 @@ def download_model_to_image(model_dir, model_name):
         model_name,
         local_dir=model_dir,
         ignore_patterns=["*.pt", "*.bin"],  # Using safetensors
+        token=os.environ["HF_TOKEN"],
     )
     move_cache()
 
@@ -71,6 +79,7 @@ def download_model_to_image(model_dir, model_name):
         download_model_to_image,
         timeout=60 * 20,
         kwargs={"model_dir": MODEL_DIR, "model_name": MODEL_NAME},
+        secrets=[modal.Secret.from_name("huggingface-secret")],
     )
 )
 
diff --git a/06_gpu_and_ml/llm-structured/outlines_generate.py b/06_gpu_and_ml/llm-structured/outlines_generate.py
index 19e7ae763..b54acadbf 100644
--- a/06_gpu_and_ml/llm-structured/outlines_generate.py
+++ b/06_gpu_and_ml/llm-structured/outlines_generate.py
@@ -24,7 +24,7 @@
 #  First, you'll want to build an image and install the relevant Python dependencies:
 # `outlines` and a Hugging Face inference stack.
 
-from modal import App, Image, gpu
+from modal import App, Image, Secret, gpu
 
 app = App(
     name="outlines-app"
@@ -42,6 +42,13 @@
 # Next, we download the Mistral-7B model from Hugging Face.
 # We do this as part of the definition of our Modal image so that
 # we don't need to download it every time our inference function is run.
+#
+# For this step to work on a [gated model](https://huggingface.co/docs/hub/en/models-gated)
+# like Mistral 7B, the `HF_TOKEN` environment variable must be set.
+#
+# After [creating a HuggingFace access token](https://huggingface.co/settings/tokens)
+# and accepting the [terms of use](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1),
+# head to the [secrets page](https://modal.com/secrets) to share it with Modal as `huggingface-secret`.
 
 
 def import_model():
@@ -50,7 +57,10 @@ def import_model():
     outlines.models.transformers("mistralai/Mistral-7B-v0.1")
 
 
-outlines_image = outlines_image.run_function(import_model)
+outlines_image = outlines_image.run_function(
+    import_model,
+    secrets=[Secret.from_name("huggingface-secret")],
+)
 
 
 # ## Define the schema

From ad9346a7bf38272470ce20e1a3c6d4f578b2cd2c Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Thu, 2 May 2024 20:09:25 -0700
Subject: [PATCH 08/15] fixes relative path between instructor_generate and
 utils (#728)

---
 06_gpu_and_ml/llm-structured/instructor_generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/06_gpu_and_ml/llm-structured/instructor_generate.py b/06_gpu_and_ml/llm-structured/instructor_generate.py
index 242f419d3..57ba0ed0d 100644
--- a/06_gpu_and_ml/llm-structured/instructor_generate.py
+++ b/06_gpu_and_ml/llm-structured/instructor_generate.py
@@ -206,7 +206,7 @@ def get_examples(silent=True):
     We use importlib to avoid the need to define the repo as a package."""
     import importlib
 
-    examples_root = Path(__file__).parent.parent.parent.parent
+    examples_root = Path(__file__).parent.parent.parent
     spec = importlib.util.spec_from_file_location(
         "utils", f"{examples_root}/internal/utils.py"
     )

From 8ff22cc373be59f9331f55c7aff799a41b7c0360 Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Thu, 2 May 2024 22:34:50 -0700
Subject: [PATCH 09/15] centers logo, positions prisms, nicer material, match
 CPU + GPU throughput (#729)

---
 06_gpu_and_ml/blender/blender_video.py | 63 +++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/06_gpu_and_ml/blender/blender_video.py b/06_gpu_and_ml/blender/blender_video.py
index ee1ed85a7..c07547691 100644
--- a/06_gpu_and_ml/blender/blender_video.py
+++ b/06_gpu_and_ml/blender/blender_video.py
@@ -6,9 +6,9 @@
 # This example shows how you can render an animated 3D scene using
 # [Blender](https://www.blender.org/)'s Python interface.
 #
-# You can run it on CPUs to scale out on one hundred of containers
+# You can run it on CPUs to scale out on one hundred containers
 # or run it on GPUs to get higher throughput per node.
-# Even with this simple scene, GPUs render 2x faster than CPUs.
+# Even with this simple scene, GPUs render 10x faster than CPUs.
 #
 # The final render looks something like this:
 #
@@ -60,7 +60,7 @@
 
 
 @app.function(
-    gpu="T4" if WITH_GPU else None,
+    gpu="A10G" if WITH_GPU else None,
     concurrency_limit=10
     if WITH_GPU
     else 100,  # default limits on Modal free tier
@@ -104,12 +104,33 @@ def render(angle: int = 0) -> bytes:
     # add the Modal logo: two neon green rectangular prisms
     iridescent_material = create_iridescent_material()
 
-    add_prism(ctx, (-1, 0, 0), 45, angle, iridescent_material)
-    add_prism(ctx, (3, 0, 0), -45, angle, iridescent_material)
+    add_prism(ctx, (-2.07, -1, 0), 45, angle, iridescent_material)
+    add_prism(ctx, (2.07, 1, 0), -45, angle, iridescent_material)
 
-    # set up the lighting and camera
+    # set up the lighting
+    # warm key light
     bpy.ops.object.light_add(type="POINT", location=(5, 5, 5))
-    bpy.context.object.data.energy = 10
+    key_light = bpy.context.object
+    key_light.data.energy = 100
+    key_light.data.color = (1, 0.8, 0.5)  # warm
+
+    # tight, cool spotlight
+    bpy.ops.object.light_add(type="SPOT", radius=1, location=(4, 0, 6))
+    spot_light = bpy.context.object
+    spot_light.data.energy = 500
+    spot_light.data.spot_size = 0.5
+    spot_light.data.color = (0.8, 0.8, 1)  # cool
+    spot_light.rotation_euler = (3.14 / 4, 0, -3.14 / 4)
+
+    # soft overall illumination
+    bpy.ops.object.light_add(type="AREA", radius=3, location=(-3, 3, 5))
+    area_light = bpy.context.object
+    area_light.data.energy = 50  # softer
+    area_light.data.size = 5  # larger
+    area_light.data.color = (1, 1, 1)  # neutral
+    area_light.rotation_euler = (3.14 / 2, 0, 3.14)
+
+    # add camera
     bpy.ops.object.camera_add(location=(7, -7, 5))
     scene.camera = bpy.context.object
     ctx.object.rotation_euler = (1.1, 0, 0.785)
@@ -221,8 +242,8 @@ def combine(
 #
 # The bytes for the video come back to our local machine, and we write them to a file.
 #
-# The whole rendering process (for six seconds of 1080p 60 FPS video) takes about five minutes to run on 10 T4 GPUs,
-# with a per-frame latency of under 10 seconds, and about two minutes to run on 100 CPUs, with a per-frame latency of about 30 seconds.
+# The whole rendering process (for six seconds of 1080p 60 FPS video) takes about five minutes to run on 10 A10G GPUs,
+# with a per-frame latency of about 10 seconds, and about five minutes to run on 100 CPUs, with a per-frame latency of about one minute.
 
 
 @app.local_entrypoint()
@@ -251,6 +272,11 @@ def add_prism(ctx, location, initial_rotation, angle, material):
     bpy.ops.mesh.primitive_cube_add(size=2, location=location)
     obj = ctx.object  # the newly created object
 
+    bevel = obj.modifiers.new(name="Bevel", type="BEVEL")
+    bevel.width = 0.2
+    bevel.segments = 5
+    bevel.profile = 1.0
+
     # assign the material to the object
     obj.data.materials.append(material)
 
@@ -278,13 +304,22 @@ def create_iridescent_material():
 
     nodes.clear()
 
-    output_node = nodes.new(type="ShaderNodeOutputMaterial")
+    principled_node = nodes.new(type="ShaderNodeBsdfPrincipled")
+
     emission_node = nodes.new(type="ShaderNodeEmission")
     layer_weight = nodes.new(type="ShaderNodeLayerWeight")
     color_ramp = nodes.new(type="ShaderNodeValToRGB")
 
+    mix_shader_node = nodes.new(type="ShaderNodeMixShader")
+
+    output_node = nodes.new(type="ShaderNodeOutputMaterial")
+
+    principled_node.inputs["Base Color"].default_value = (1, 1, 1, 1)
+    principled_node.inputs["Metallic"].default_value = 1.0
+    principled_node.inputs["Roughness"].default_value = 0.5
+
     color_ramp.color_ramp.elements[0].color = (0, 0, 0, 1)
-    color_ramp.color_ramp.elements[1].color = (0, 1, 0, 1)
+    color_ramp.color_ramp.elements[1].color = (0, 0.5, 0, 1)
     layer_weight.inputs["Blend"].default_value = 0.4
 
     links.new(layer_weight.outputs["Fresnel"], color_ramp.inputs["Fac"])
@@ -293,6 +328,10 @@ def create_iridescent_material():
     emission_node.inputs["Strength"].default_value = 5.0
     emission_node.inputs["Color"].default_value = (0.0, 1.0, 0.0, 1)
 
-    links.new(emission_node.outputs["Emission"], output_node.inputs["Surface"])
+    links.new(emission_node.outputs["Emission"], mix_shader_node.inputs[1])
+    links.new(principled_node.outputs["BSDF"], mix_shader_node.inputs[2])
+    links.new(layer_weight.outputs["Fresnel"], mix_shader_node.inputs["Fac"])
+
+    links.new(mix_shader_node.outputs["Shader"], output_node.inputs["Surface"])
 
     return mat

From 03c44cb42a7440fc31ef00631f1a0cf0589161bb Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Fri, 3 May 2024 10:19:43 -0700
Subject: [PATCH 10/15] refactors lighting out of main render function (#730)

---
 06_gpu_and_ml/blender/blender_video.py | 52 ++++++++++++++------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/06_gpu_and_ml/blender/blender_video.py b/06_gpu_and_ml/blender/blender_video.py
index c07547691..c58c4cbf4 100644
--- a/06_gpu_and_ml/blender/blender_video.py
+++ b/06_gpu_and_ml/blender/blender_video.py
@@ -107,30 +107,8 @@ def render(angle: int = 0) -> bytes:
     add_prism(ctx, (-2.07, -1, 0), 45, angle, iridescent_material)
     add_prism(ctx, (2.07, 1, 0), -45, angle, iridescent_material)
 
-    # set up the lighting
-    # warm key light
-    bpy.ops.object.light_add(type="POINT", location=(5, 5, 5))
-    key_light = bpy.context.object
-    key_light.data.energy = 100
-    key_light.data.color = (1, 0.8, 0.5)  # warm
-
-    # tight, cool spotlight
-    bpy.ops.object.light_add(type="SPOT", radius=1, location=(4, 0, 6))
-    spot_light = bpy.context.object
-    spot_light.data.energy = 500
-    spot_light.data.spot_size = 0.5
-    spot_light.data.color = (0.8, 0.8, 1)  # cool
-    spot_light.rotation_euler = (3.14 / 4, 0, -3.14 / 4)
-
-    # soft overall illumination
-    bpy.ops.object.light_add(type="AREA", radius=3, location=(-3, 3, 5))
-    area_light = bpy.context.object
-    area_light.data.energy = 50  # softer
-    area_light.data.size = 5  # larger
-    area_light.data.color = (1, 1, 1)  # neutral
-    area_light.rotation_euler = (3.14 / 2, 0, 3.14)
-
-    # add camera
+    # add lighting and camera
+    add_lighting()
     bpy.ops.object.camera_add(location=(7, -7, 5))
     scene.camera = bpy.context.object
     ctx.object.rotation_euler = (1.1, 0, 0.785)
@@ -335,3 +313,29 @@ def create_iridescent_material():
     links.new(mix_shader_node.outputs["Shader"], output_node.inputs["Surface"])
 
     return mat
+
+
+def add_lighting():
+    import bpy
+
+    # warm key light
+    bpy.ops.object.light_add(type="POINT", location=(5, 5, 5))
+    key_light = bpy.context.object
+    key_light.data.energy = 100
+    key_light.data.color = (1, 0.8, 0.5)  # warm
+
+    # tight, cool spotlight
+    bpy.ops.object.light_add(type="SPOT", radius=1, location=(4, 0, 6))
+    spot_light = bpy.context.object
+    spot_light.data.energy = 500
+    spot_light.data.spot_size = 0.5
+    spot_light.data.color = (0.8, 0.8, 1)  # cool
+    spot_light.rotation_euler = (3.14 / 4, 0, -3.14 / 4)
+
+    # soft overall illumination
+    bpy.ops.object.light_add(type="AREA", radius=3, location=(-3, 3, 5))
+    area_light = bpy.context.object
+    area_light.data.energy = 50  # softer
+    area_light.data.size = 5  # larger
+    area_light.data.color = (1, 1, 1)  # neutral
+    area_light.rotation_euler = (3.14 / 2, 0, 3.14)

From 5923bff5ab734633ae06b6ab4493838014794d06 Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Fri, 3 May 2024 10:36:41 -0700
Subject: [PATCH 11/15] adds rate limit handler from slack SDK (#731)

---
 10_integrations/webscraper.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/10_integrations/webscraper.py b/10_integrations/webscraper.py
index e85135b08..d817b323e 100644
--- a/10_integrations/webscraper.py
+++ b/10_integrations/webscraper.py
@@ -39,7 +39,9 @@ async def get_links(url: str) -> set[str]:
     return set(links)
 
 
-slack_sdk_image = modal.Image.debian_slim().pip_install("slack-sdk")
+slack_sdk_image = modal.Image.debian_slim(python_version="3.10").pip_install(
+    "slack-sdk==3.27.1"
+)
 
 
 @app.function(
@@ -48,9 +50,13 @@ async def get_links(url: str) -> set[str]:
 )
 def bot_token_msg(channel, message):
     import slack_sdk
+    from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler
 
-    print(f"Posting {message} to #{channel}")
     client = slack_sdk.WebClient(token=os.environ["SLACK_BOT_TOKEN"])
+    rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=3)
+    client.retry_handlers.append(rate_limit_handler)
+
+    print(f"Posting {message} to #{channel}")
     client.chat_postMessage(channel=channel, text=message)
 
 

From e0b46deb9889d25832fb392307e9fdccb52d3528 Mon Sep 17 00:00:00 2001
From: Talha SARI <sarit17@itu.edu.tr>
Date: Sun, 5 May 2024 04:00:56 +0300
Subject: [PATCH 12/15] Fix whisper streaming (#733)

* change endpoint name to transcribe to match example usage

* add remote method to modal function usage

* use aio to convert synch map into asynch

* minor fix

* change sleep to 0, fixed the curl giving error otherwise

* correct old typo

---------

Co-authored-by: Charles Frye <charles@modal.com>
---
 06_gpu_and_ml/openai_whisper/streaming/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/06_gpu_and_ml/openai_whisper/streaming/main.py b/06_gpu_and_ml/openai_whisper/streaming/main.py
index cc8ae23b3..676d2b485 100644
--- a/06_gpu_and_ml/openai_whisper/streaming/main.py
+++ b/06_gpu_and_ml/openai_whisper/streaming/main.py
@@ -183,16 +183,16 @@ async def stream_whisper(audio_data: bytes):
         f.flush()
         segment_gen = split_silences(f.name)
 
-    for result in transcribe_segment.starmap(
+    async for result in transcribe_segment.starmap(
         segment_gen, kwargs=dict(audio_data=audio_data, model="base.en")
     ):
-        # Must cooperatively yeild here otherwise `StreamingResponse` will not iteratively return stream parts.
-        # see: https://github.com/python/asyncio/issues/284
-        await asyncio.sleep(0.5)
+        # Must cooperatively yield here otherwise `StreamingResponse` will not iteratively return stream parts.
+        # see: https://github.com/python/asyncio/issues/284#issuecomment-154162668
+        await asyncio.sleep(0)
         yield result["text"]
 
 
-@web_app.get("/")
+@web_app.get("/transcribe")
 async def transcribe(url: str):
     """
     Usage:
@@ -213,7 +213,7 @@ async def transcribe(url: str):
 
     print(f"downloading {url}")
     try:
-        audio_data = download_mp3_from_youtube(url)
+        audio_data = download_mp3_from_youtube.remote(url)
     except pytube.exceptions.RegexMatchError:
         raise HTTPException(
             status_code=422, detail=f"Could not process url {url}"

From a238c9758583ccaeccdcbc217dddee75651cf26e Mon Sep 17 00:00:00 2001
From: bofeng huang <bofenghuang7@gmail.com>
Date: Sun, 5 May 2024 03:08:48 +0200
Subject: [PATCH 13/15] Fix vLLM template (#734)

* Update vllm_mixtral.py

* Fix template

* Fix template
---
 06_gpu_and_ml/llm-serving/vllm_gemma.py     | 2 +-
 06_gpu_and_ml/llm-serving/vllm_inference.py | 4 ++--
 06_gpu_and_ml/llm-serving/vllm_mixtral.py   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/06_gpu_and_ml/llm-serving/vllm_gemma.py b/06_gpu_and_ml/llm-serving/vllm_gemma.py
index 2a3545961..634c6d47a 100644
--- a/06_gpu_and_ml/llm-serving/vllm_gemma.py
+++ b/06_gpu_and_ml/llm-serving/vllm_gemma.py
@@ -121,7 +121,7 @@ class Model:
     @modal.enter()
     def load(self):
         self.template = (
-            "start_of_turn>user\n{user}<end_of_turn>\n<start_of_turn>model"
+            "<start_of_turn>user\n{user}<end_of_turn>\n<start_of_turn>model\n"
         )
 
         # Load the model. Tip: Some models, like MPT, may require `trust_remote_code=true`.
diff --git a/06_gpu_and_ml/llm-serving/vllm_inference.py b/06_gpu_and_ml/llm-serving/vllm_inference.py
index 3f67aa908..c24e345db 100644
--- a/06_gpu_and_ml/llm-serving/vllm_inference.py
+++ b/06_gpu_and_ml/llm-serving/vllm_inference.py
@@ -109,11 +109,11 @@ class Model:
     def load_model(self):
         # Tip: models that are not fully implemented by Hugging Face may require `trust_remote_code=true`.
         self.llm = vllm.LLM(MODEL_DIR, tensor_parallel_size=GPU_CONFIG.count)
-        self.template = """<s>[INST] <<SYS>>
+        self.template = """[INST] <<SYS>>
 {system}
 <</SYS>>
 
-{user} [/INST] """
+{user} [/INST]"""
 
     @modal.method()
     def generate(self, user_questions):
diff --git a/06_gpu_and_ml/llm-serving/vllm_mixtral.py b/06_gpu_and_ml/llm-serving/vllm_mixtral.py
index 57618ae28..eb236b9cb 100644
--- a/06_gpu_and_ml/llm-serving/vllm_mixtral.py
+++ b/06_gpu_and_ml/llm-serving/vllm_mixtral.py
@@ -121,7 +121,7 @@ def start_engine(self):
             disable_log_stats=True,  # disable logging so we can stream tokens
             disable_log_requests=True,
         )
-        self.template = "<s> [INST] {user} [/INST] "
+        self.template = "[INST] {user} [/INST]"
 
         # this can take some time!
         self.engine = AsyncLLMEngine.from_engine_args(engine_args)

From 2ac53ebc35b38e30d2288efb3cecaf41f19c8733 Mon Sep 17 00:00:00 2001
From: Akshat Bubna <akshatb42@gmail.com>
Date: Mon, 6 May 2024 00:10:06 -0400
Subject: [PATCH 14/15] install numpy explicitly in wikipedia example (#736)

---
 06_gpu_and_ml/embeddings/wikipedia/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/06_gpu_and_ml/embeddings/wikipedia/main.py b/06_gpu_and_ml/embeddings/wikipedia/main.py
index 95d898c22..0c3ffb5cc 100644
--- a/06_gpu_and_ml/embeddings/wikipedia/main.py
+++ b/06_gpu_and_ml/embeddings/wikipedia/main.py
@@ -78,7 +78,7 @@ def spawn_server() -> subprocess.Popen:
         add_python="3.10",
     )
     .dockerfile_commands("ENTRYPOINT []")
-    .pip_install("httpx")
+    .pip_install("httpx", "numpy")
 )
 
 with tei_image.imports():

From 75d6c997ecedb953e81b5b00cadf677eb96aed9f Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Mon, 6 May 2024 10:44:15 -0700
Subject: [PATCH 15/15] Run examples on change (#735)

* inital draft of action to test monitoring

* cleans up monitoring workflow

* more complete draft of monitoring test action

* removes draft monitoring workflow, reorganizes existing workflows

* update internal development requirements

* turn off dry run now that we're going back to prod

* reorganize environment setup

* WIP version of example execution

* adds .secrets file from act

* handles modal serve, proper system exit, drops extra script

* updates actions, better environment setup

* handle PRs with no changed files

* add back dev dependencies for jupytext and pydantic in deploy

* reverts changes to typechecking to avoid slowdown
---
 .github/actions/setup/action.yml   | 36 ++++++++++++++
 .github/workflows/cd.yml           |  8 +---
 .github/workflows/check.yml        | 27 +++--------
 .github/workflows/run-examples.yml | 76 ++++++++++++++++++++++++++++++
 .gitignore                         |  3 ++
 internal/requirements.txt          |  7 ++-
 internal/run_example.py            | 50 ++++++++++++++++++++
 7 files changed, 179 insertions(+), 28 deletions(-)
 create mode 100644 .github/actions/setup/action.yml
 create mode 100644 .github/workflows/run-examples.yml
 create mode 100644 internal/run_example.py

diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml
new file mode 100644
index 000000000..0312efef1
--- /dev/null
+++ b/.github/actions/setup/action.yml
@@ -0,0 +1,36 @@
+name: setup
+
+description: Set up a Python environment for the examples.
+
+inputs:
+  version:
+    description: Which Python version to install
+    required: false
+    default: "3.11"
+  devDependencies:
+    description: Whether to skip dependencies
+    required: false
+    default: "no-skip"
+
+runs:
+  using: composite
+  steps:
+    - name: Install Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ inputs.version }}
+
+    - name: Install base packages
+      shell: bash
+      run: |
+        pip install uv
+        uv pip install --system setuptools wheel
+
+    - name: Install development Python packages
+      if: ${{ inputs.devDependencies != 'skip' }}
+      shell: bash
+      run: uv pip install --system -r internal/requirements.txt
+
+    - name: Install the modal client
+      shell: bash
+      run: uv pip install --system modal
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
index 50ab209a0..451c08f46 100644
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -17,13 +17,9 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
         with:
-          python-version: "3.9"
-
-      - name: Install Modal client package and jupytext
-        run: pip install modal-client jupytext pydantic~=1.10
+          fetch-depth: 1
+      - uses: ./.github/actions/setup
 
       - name: Run deployment script
         run: |
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
index 389875d8d..9f058e4c0 100644
--- a/.github/workflows/check.yml
+++ b/.github/workflows/check.yml
@@ -13,13 +13,9 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
-
-      # keep version here in sync with .pre-commit-config.yaml and other modal repos
-      - run: pip install ruff==0.2.1
+          fetch-depth: 1
+      - uses: ./.github/actions/setup
 
       - run: ruff check
 
@@ -31,16 +27,14 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
-      - name: Install NbConvert
-        run: pip install jupyter nbconvert
+          fetch-depth: 1
+      - uses: ./.github/actions/setup
 
       - name: Check notebooks are cleaned
         run: |
           jupyter nbconvert --clear-output --inplace 11_notebooks/*.ipynb
-          git diff --quiet && git diff --cached --quiet || exit 1
+          git diff --quiet 11_notebooks/*.ipynb && git diff --cached --quiet 11_notebooks/*.ipynb || exit 1
 
   pytest:
     name: Pytest
@@ -48,16 +42,9 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
-
-      - name: Install dev dependencies
-        run: pip install pytest jupytext pydantic~=1.10
-
-      - name: Install the Modal client
-        run: pip install modal-client
+          fetch-depth: 1
+      - uses: ./.github/actions/setup
 
       - name: Run
         run: pytest -v .
diff --git a/.github/workflows/run-examples.yml b/.github/workflows/run-examples.yml
new file mode 100644
index 000000000..bf27d0adb
--- /dev/null
+++ b/.github/workflows/run-examples.yml
@@ -0,0 +1,76 @@
+name: Run
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - "**.py"
+  push:
+    branches:
+      - main
+    paths:
+      - "**.py"
+  workflow_dispatch:
+
+# Cancel previous runs of the same PR but do not cancel previous runs on main
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+env:
+  TERM: linux
+  TERMINFO: /etc/terminfo
+  MODAL_TOKEN_ID: ${{ secrets.MODAL_MODAL_LABS_TOKEN_ID }}
+  MODAL_TOKEN_SECRET: ${{ secrets.MODAL_MODAL_LABS_TOKEN_SECRET }}
+  MODAL_ENVIRONMENT: main
+
+jobs:
+  # Output all changed files in a JSON format compatible with GitHub Actions job matrices
+  diff-matrix:
+    name: Generate matrix of changed examples
+    runs-on: ubuntu-20.04
+    outputs:
+      matrix: ${{ steps.diff.outputs.all_changed_files }}
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Find changed examples
+        id: diff
+        uses: tj-actions/changed-files@v44
+        with:
+          files: "**.py"
+          files_ignore: "internal/**,misc/**"
+          matrix: true
+
+      - name: List all changed examples
+        run: echo '${{ steps.diff.outputs.all_changed_files }}'
+
+  # Run each changed example, using the output of the previous step as a job matrix
+  run-changed:
+    name: Run changed example
+    needs: [diff-matrix]
+    if:
+      ${{ needs.diff-matrix.outputs.matrix != '[]' &&
+      needs.diff-matrix.outputs.matrix != '' }}
+    runs-on: ubuntu-20.04
+    strategy:
+      matrix:
+        file: ${{ fromJson(needs.diff-matrix.outputs.matrix) }}
+      fail-fast: false
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+      - uses: ./.github/actions/setup
+
+      - name: Run example
+        run: |
+          echo "Running ${{ matrix.file }}"
+          stem=$(basename "${{ matrix.file }}" .py)
+          python3 -m internal.run_example $stem || exit $?
diff --git a/.gitignore b/.gitignore
index 53fe8b69e..3218fc050 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,6 @@
 
 venv
 .venv
+
+# secrets file for act, tool for local GitHub Actions testing
+.secrets
diff --git a/internal/requirements.txt b/internal/requirements.txt
index 42bf85702..5c5120ec8 100644
--- a/internal/requirements.txt
+++ b/internal/requirements.txt
@@ -1,5 +1,8 @@
-modal
 pytest
+jupyter
+ipython
+nbconvert
 jupytext~=1.16.1
 pydantic~=1.10.14
-mypy==0.950
+mypy==1.2.0
+ruff==0.2.1
diff --git a/internal/run_example.py b/internal/run_example.py
new file mode 100644
index 000000000..3b06a3cb0
--- /dev/null
+++ b/internal/run_example.py
@@ -0,0 +1,50 @@
+import os
+import subprocess
+import sys
+import time
+
+from . import utils
+
+MINUTES = 60
+TIMEOUT = 12 * MINUTES
+
+
+def run_script(example):
+    t0 = time.time()
+
+    try:
+        print(f"cli args: {example.cli_args}")
+        process = subprocess.run(
+            example.cli_args,
+            env=os.environ | {"MODAL_SERVE_TIMEOUT": "5.0"},
+            timeout=TIMEOUT,
+        )
+        total_time = time.time() - t0
+        if process.returncode == 0:
+            print(f"Success after {total_time:.2f}s :)")
+        else:
+            print(
+                f"Failed after {total_time:.2f}s with return code {process.returncode} :("
+            )
+
+        returncode = process.returncode
+
+    except subprocess.TimeoutExpired:
+        print(f"Past timeout of {TIMEOUT}s :(")
+        returncode = 999
+
+    return returncode
+
+
+def run_single_example(stem):
+    examples = utils.get_examples()
+    for example in examples:
+        if stem == example.stem:
+            return run_script(example)
+    else:
+        print(f"Could not find example name {stem}")
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(run_single_example(sys.argv[1]))