From d8aa41c1dd94b3ce3ddbdd2db30f75c3d90959dd Mon Sep 17 00:00:00 2001 From: fm1320 Date: Mon, 13 Jan 2025 21:07:11 +0000 Subject: [PATCH] Simplify nested ifs and add few more test examples --- .../components/model_client/openai_client.py | 32 ++- docs/source/tutorials/generator.rst | 221 ++++++++++++++++++ docs/source/tutorials/model_client.rst | 105 --------- .../multimodal_client_testing_examples.py | 119 ++++++++++ 4 files changed, 354 insertions(+), 123 deletions(-) create mode 100644 tutorials/multimodal_client_testing_examples.py diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py index a81f8287..c3750667 100644 --- a/adalflow/adalflow/components/model_client/openai_client.py +++ b/adalflow/adalflow/components/model_client/openai_client.py @@ -305,27 +305,23 @@ def convert_inputs_to_api_kwargs( elif model_type == ModelType.IMAGE_GENERATION: # For image generation, input is the prompt final_model_kwargs["prompt"] = input - # Set defaults for DALL-E 3 if not specified + # Ensure model is specified if "model" not in final_model_kwargs: - final_model_kwargs["model"] = "dall-e-3" - if "size" not in final_model_kwargs: - final_model_kwargs["size"] = "1024x1024" - if "quality" not in final_model_kwargs: - final_model_kwargs["quality"] = "standard" - if "n" not in final_model_kwargs: - final_model_kwargs["n"] = 1 - if "response_format" not in final_model_kwargs: - final_model_kwargs["response_format"] = "url" + raise ValueError("model must be specified for image generation") + # Set defaults for DALL-E 3 if not specified + final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024") + final_model_kwargs["quality"] = final_model_kwargs.get("quality", "standard") + final_model_kwargs["n"] = final_model_kwargs.get("n", 1) + final_model_kwargs["response_format"] = final_model_kwargs.get("response_format", "url") # Handle image edits and variations - if "image" in final_model_kwargs: - if isinstance(final_model_kwargs["image"], str): - # If it's a file path, encode it - if os.path.isfile(final_model_kwargs["image"]): - final_model_kwargs["image"] = self._encode_image(final_model_kwargs["image"]) - if "mask" in final_model_kwargs and isinstance(final_model_kwargs["mask"], str): - if os.path.isfile(final_model_kwargs["mask"]): - final_model_kwargs["mask"] = self._encode_image(final_model_kwargs["mask"]) + image = final_model_kwargs.get("image") + if isinstance(image, str) and os.path.isfile(image): + final_model_kwargs["image"] = self._encode_image(image) + + mask = final_model_kwargs.get("mask") + if isinstance(mask, str) and os.path.isfile(mask): + final_model_kwargs["mask"] = self._encode_image(mask) else: raise ValueError(f"model_type {model_type} is not supported") return final_model_kwargs diff --git a/docs/source/tutorials/generator.rst b/docs/source/tutorials/generator.rst index 214886cb..2b406fda 100644 --- a/docs/source/tutorials/generator.rst +++ b/docs/source/tutorials/generator.rst @@ -106,6 +106,161 @@ In particular, we created :class:`GeneratorOutput` t Whether to do further processing or terminate the pipeline whenever an error occurs is up to the user from here on. +Basic Generator Tutorial +===================== + +The Generator class is the core component in AdalFlow for interacting with AI models. This tutorial covers the essential concepts and patterns. + +What is a Generator? +------------------ + +A Generator is a unified interface for model interactions that: + +1. Takes input and formats it using a prompt template +2. Sends the formatted input to an AI model +3. Returns a standardized ``GeneratorOutput`` object + +Basic Usage +---------- + +Here's the simplest way to use a Generator: + +.. code-block:: python + + from adalflow.core import Generator + from adalflow.components.model_client.openai_client import OpenAIClient + + # Create a generator + gen = Generator( + model_client=OpenAIClient(), + model_kwargs={ + "model": "gpt-4o-mini", + "temperature": 0.7 + } + ) + + # Use the generator + response = gen({"input_str": "What is the capital of France?"}) + print(response.raw_response) + +Understanding the Output +---------------------- + +Every Generator call returns a ``GeneratorOutput`` object: + +.. code-block:: python + + response = gen({"input_str": "Hello"}) + + # Access different parts of the response + print(response.raw_response) # Raw model output + print(response.data) # Processed data (if using output processors) + print(response.error) # Error message if something went wrong + print(response.usage) # Token usage information + +When to Create a Subclass +----------------------- + +You should create a Generator subclass in two main cases: + +1. **Different Model Types**: When using non-LLM endpoints + + .. code-block:: python + + class ImageGenerator(Generator): + """For DALL-E and other image generation models""" + model_type = ModelType.IMAGE_GENERATION + +2. **Custom Processing**: When you need special input/output handling + + .. code-block:: python + + class CustomGenerator(Generator): + def _pre_call(self, prompt_kwargs, model_kwargs): + # Custom preprocessing + return super()._pre_call(prompt_kwargs, model_kwargs) + +When NOT to Subclass +------------------ + +Don't create a subclass for: + +1. **Model Parameters**: Use ``model_kwargs`` instead + + .. code-block:: python + + # Just pass parameters directly + gen = Generator( + model_client=client, + model_kwargs={ + "model": "gpt-4o-mini", + "temperature": 0.9 + } + ) + +2. **Output Processing**: Use output processors + + .. code-block:: python + + from adalflow.components.output_processors import JsonParser + + gen = Generator( + model_client=client, + output_processors=JsonParser() # Process output as JSON + ) + +Common Patterns +------------- + +1. **Error Handling**: + + .. code-block:: python + + response = gen({"input_str": "Query"}) + if response.error: + print(f"Error: {response.error}") + else: + print(response.raw_response) + +2. **Async Usage**: + + .. code-block:: python + + async def generate(): + response = await gen.acall({"input_str": "Hello"}) + print(response.raw_response) + +3. **Streaming**: + + .. code-block:: python + + gen = Generator( + model_client=client, + model_kwargs={"stream": True} + ) + for chunk in gen({"input_str": "Tell me a story"}): + print(chunk) + +Model Types +---------- + +Generator supports different model types through ``ModelType``: + +- ``ModelType.LLM``: Text generation (default) +- ``ModelType.IMAGE_GENERATION``: Image generation (DALL-E) +- ``ModelType.EMBEDDER``: Text embeddings +- ``ModelType.RERANKER``: Document reranking + +Best Practices +------------ + +1. Always check for errors in the response +2. Use output processors for structured outputs +3. Set model parameters in ``model_kwargs`` +4. Use async methods for better performance in async contexts +5. Use streaming for long responses + +Remember: The Generator is designed to provide a consistent interface regardless of the underlying model or task. Generator In Action --------------------------------------- @@ -480,6 +635,72 @@ It will require users to define ``Parameter`` and pass it to the ``prompt_kwargs .. If you change the LLM, you may need to update this tokenizer to ensure accurate token counts, chunking, and prompting. +Image Generation +------------------------------------------------- + +The Generator class also supports image generation through DALL-E models. First, you need to define a Generator subclass with the correct model type: + +.. code-block:: python + + from adalflow import Generator + from adalflow.core.types import ModelType + + class ImageGenerator(Generator): + """Generator subclass for image generation.""" + model_type = ModelType.IMAGE_GENERATION + +Then you can use it like this: + +.. code-block:: python + + from adalflow import OpenAIClient + + generator = ImageGenerator( + model_client=OpenAIClient(), + model_kwargs={ + "model": "dall-e-3", # or "dall-e-2" + "size": "1024x1024", # "1024x1024", "1024x1792", or "1792x1024" for DALL-E 3 + "quality": "standard", # "standard" or "hd" (DALL-E 3 only) + "n": 1 # Number of images (1 for DALL-E 3, 1-10 for DALL-E 2) + } + ) + + # Generate an image from text + response = generator( + prompt_kwargs={"input_str": "A white siamese cat in a space suit"} + ) + # response.data will contain the image URL + + # Edit an existing image + response = generator( + prompt_kwargs={"input_str": "Add a red hat"}, + model_kwargs={ + "model": "dall-e-2", + "image": "path/to/cat.png", # Original image + "mask": "path/to/mask.png" # Optional mask showing where to edit + } + ) + + # Create variations of an image + response = generator( + prompt_kwargs={"input_str": None}, # Not needed for variations + model_kwargs={ + "model": "dall-e-2", + "image": "path/to/cat.png" # Image to create variations of + } + ) + +The generator supports: + +- Image generation from text descriptions using DALL-E 3 or DALL-E 2 +- Image editing with optional masking (DALL-E 2) +- Creating variations of existing images (DALL-E 2) +- Both local file paths and base64-encoded images +- Various image sizes and quality settings +- Multiple output formats (URL or base64) + +The response will always be wrapped in a ``GeneratorOutput`` object, maintaining consistency with other AdalFlow operations. The generated image(s) will be available in the ``data`` field as either a URL or base64 string. + .. admonition:: API reference :class: highlight diff --git a/docs/source/tutorials/model_client.rst b/docs/source/tutorials/model_client.rst index e8226398..4f73e2ee 100644 --- a/docs/source/tutorials/model_client.rst +++ b/docs/source/tutorials/model_client.rst @@ -1513,111 +1513,6 @@ This is the function call that triggers the execution of the custom model client build_custom_model_client() - -OPENAI LLM Chat - Multimodal Example -------------------------------------------------- - -The OpenAI client also supports multimodal inputs. Here's a quick example: - -.. code-block:: python - - from adalflow import Generator, OpenAIClient - - generator = Generator( - model_client=OpenAIClient(), - model_kwargs={ - "model": "gpt-4o", - "max_tokens": 300 - } - ) - - # Single image - response = generator( - prompt_kwargs={ - "input_str": "What's in this image?", - "images": "path/to/image.jpg" # Local file or URL - } - ) - - # Multiple images - response = generator( - prompt_kwargs={ - "input_str": "Compare these images.", - "images": [ - "path/to/first.jpg", - "https://example.com/second.jpg" - ] - } - ) - -The client handles both local files and URLs, with support for PNG, JPEG, WEBP, and non-animated GIF formats. - -OPENAI Image Generation -------------------------------------------------- - -The OpenAI client supports image generation, editing, and variation creation through DALL-E models. First, you need to define a Generator class with the correct model type: - -.. code-block:: python - - from adalflow import Generator - from adalflow.core.types import ModelType - - class ImageGenerator(Generator): - """Generator subclass for image generation.""" - model_type = ModelType.IMAGE_GENERATION - -Then you can use it like this: - -.. code-block:: python - - from adalflow import OpenAIClient - - generator = ImageGenerator( - model_client=OpenAIClient(), - model_kwargs={ - "model": "dall-e-3", # or "dall-e-2" - "size": "1024x1024", # "1024x1024", "1024x1792", or "1792x1024" for DALL-E 3 - "quality": "standard", # "standard" or "hd" (DALL-E 3 only) - "n": 1 # Number of images (1 for DALL-E 3, 1-10 for DALL-E 2) - } - ) - - # Generate an image from text - response = generator( - prompt_kwargs={"input_str": "A white siamese cat in a space suit"} - ) - # response.data will contain the image URL - - # Edit an existing image - response = generator( - prompt_kwargs={"input_str": "Add a red hat"}, - model_kwargs={ - "model": "dall-e-2", - "image": "path/to/cat.png", # Original image - "mask": "path/to/mask.png" # Optional mask showing where to edit - } - ) - - # Create variations of an image - response = generator( - prompt_kwargs={"input_str": None}, # Not needed for variations - model_kwargs={ - "model": "dall-e-2", - "image": "path/to/cat.png" # Image to create variations of - } - ) - -The client supports: - -- Image generation from text descriptions using DALL-E 3 or DALL-E 2 -- Image editing with optional masking (DALL-E 2) -- Creating variations of existing images (DALL-E 2) -- Both local file paths and base64-encoded images -- Various image sizes and quality settings -- Multiple output formats (URL or base64) - -The response will always be wrapped in a ``GeneratorOutput`` object, maintaining consistency with other AdalFlow operations. The generated image(s) will be available in the ``data`` field as either a URL or base64 string. - .. admonition:: API reference :class: highlight diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py new file mode 100644 index 00000000..44bdbc69 --- /dev/null +++ b/tutorials/multimodal_client_testing_examples.py @@ -0,0 +1,119 @@ +""" +OpenAI Vision and DALL-E Example with Error Testing + +To test with different API keys: + +1. First run with a valid key: + export OPENAI_API_KEY='your_valid_key_here' + python tutorials/vision_dalle_example.py + +2. Then test with an invalid key: + export OPENAI_API_KEY='abc123' + python tutorials/vision_dalle_example.py + +The script will show different GeneratorOutput responses based on the API key status. +""" + +from adalflow.core import Generator +from adalflow.components.model_client.openai_client import OpenAIClient +from adalflow.core.types import ModelType +import asyncio +import numpy as np +from dataclasses import dataclass +from typing import List +from numpy.linalg import norm + +class ImageGenerator(Generator): + """Generator subclass for image generation.""" + model_type = ModelType.IMAGE_GENERATION + +def test_basic_generation(): + """Test basic text generation""" + client = OpenAIClient() + gen = Generator( + model_client=client, + model_kwargs={ + "model": "gpt-4o-mini", + "max_tokens": 100 + } + ) + + print("\n=== Testing Basic Generation ===") + response = gen({"input_str": "Hello, world!"}) + print(f"Response: {response}") + +def test_invalid_image_url(): + """Test Generator output with invalid image URL""" + client = OpenAIClient() + gen = Generator( + model_client=client, + model_kwargs={ + "model": "gpt-4o-mini", + "images": "https://invalid.url/nonexistent.jpg", + "max_tokens": 300 + } + ) + + print("\n=== Testing Invalid Image URL ===") + response = gen({"input_str": "What do you see in this image?"}) + print(f"Response with invalid image URL: {response}") + +def test_invalid_image_generation(): + """Test DALL-E generation with invalid parameters""" + client = OpenAIClient() + gen = ImageGenerator( + model_client=client, + model_kwargs={ + "model": "dall-e-3", + "size": "invalid_size", # Invalid size parameter + "quality": "standard", + "n": 1 + } + ) + + print("\n=== Testing Invalid DALL-E Parameters ===") + response = gen({"input_str": "A cat"}) + print(f"Response with invalid DALL-E parameters: {response}") + +def test_vision_and_generation(): + """Test both vision analysis and image generation""" + client = OpenAIClient() + + # 1. Test Vision Analysis + vision_gen = Generator( + model_client=client, + model_kwargs={ + "model": "gpt-4o-mini", + "images": "https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png", + "max_tokens": 300 + } + ) + + vision_response = vision_gen({"input_str": "What do you see in this image? Be detailed but concise."}) + print("\n=== Vision Analysis ===") + print(f"Description: {vision_response.raw_response}") + + # 2. Test DALL-E Image Generation + dalle_gen = ImageGenerator( + model_client=client, + model_kwargs={ + "model": "dall-e-3", + "size": "1024x1024", + "quality": "standard", + "n": 1 + } + ) + + # For image generation, input_str becomes the prompt + response = dalle_gen({"input_str": "A happy siamese cat playing with a red ball of yarn"}) + print("\n=== DALL-E Generation ===") + print(f"Generated Image URL: {response.data}") + +if __name__ == "__main__": + print("Starting OpenAI Vision and DALL-E test...\n") + + # Run all tests - they will show errors if API key is invalid/empty + test_basic_generation() + test_invalid_image_url() + test_invalid_image_generation() + test_vision_and_generation() \ No newline at end of file