Field level streaming (instructor-ai#334)

Co-authored-by: Jason Liu <[email protected]> Co-authored-by: Jason Liu <[email protected]>
chiradeepv · Jan 14, 2024 · ae91440 · ae91440
1 parent 36871e6
commit ae91440
Show file tree

Hide file tree

Showing 12 changed files with 628 additions and 2 deletions.
diff --git a/docs/concepts/field_streaming.md b/docs/concepts/field_streaming.md
@@ -0,0 +1,83 @@
+# Field Level Streaming
+
+Field level streaming provides incremental snapshots of the current state of the response model that are immediately useable. This approach is particularly relevant in contexts like rendering UI components.
+
+Instructor supports this pattern by making use of `Partial[T]`. This lets us dynamically create a new class that treats all of the original model's fields as `Optional`.
+
+When specifying a partial response model and setting streaming to true, the response from Instructor becomes a generator. As the generator yields results, you can iterate over these incremental updates. The last value yielded by the generator represents the completed extraction.
+
+!!! warning "Limited Validator Support"
+
+    Important: Fewer validators are supported by `Partial` response models as streamed fields will natural raise validation errors
+
+!!! note "Item Level Streaming"
+
+    If you are looking for wider validator support or to stream out a list of completed objects one by one, take a look at [Multi-task Streaming](./lists.md).
+
+Lets look at an example of streaming an extraction of conference information.
+
+```python
+import instructor
+from openai import OpenAI
+from pydantic import BaseModel
+from typing import List
+
+client = instructor.patch(OpenAI())
+
+text_block = """
+In our recent online meeting, participants from various backgrounds joined to discuss the upcoming tech conference. The names and contact details of the participants were as follows:
+
+- Name: John Doe, Email: [email protected], Twitter: @TechGuru44
+- Name: Jane Smith, Email: [email protected], Twitter: @DigitalDiva88
+- Name: Alex Johnson, Email: [email protected], Twitter: @CodeMaster2023
+
+During the meeting, we agreed on several key points. The conference will be held on March 15th, 2024, at the Grand Tech Arena located at 4521 Innovation Drive. Dr. Emily Johnson, a renowned AI researcher, will be our keynote speaker.
+
+The budget for the event is set at $50,000, covering venue costs, speaker fees, and promotional activities. Each participant is expected to contribute an article to the conference blog by February 20th.
+
+A follow-up meetingis scheduled for January 25th at 3 PM GMT to finalize the agenda and confirm the list of speakers.
+"""
+
+
+class User(BaseModel):
+    name: str
+    email: str
+    twitter: str
+
+
+class MeetingInfo(BaseModel):
+    users: List[User]
+    date: str
+    location: str
+    budget: int
+    deadline: str
+
+
+PartialMeetingInfo = instructor.Partial[MeetingInfo]
+
+
+extraction_stream = client.chat.completions.create(
+    model="gpt-4",
+    response_model=PartialMeetingInfo,
+    messages=[
+        {
+            "role": "user",
+            "content": f"Get the information about the meeting and the users {text_block}",
+        },
+    ],
+    stream=True,
+)  # type: ignore
+
+
+from rich.console import Console
+
+console = Console()
+
+for extraction in extraction_stream:
+    obj = extraction.model_dump()
+    console.clear()
+    console.print(obj)
+
+```
+
+![Partial Streaming Gif](../img/partial_streaming.gif)
diff --git a/docs/img/partial_streaming.gif b/docs/img/partial_streaming.gif
diff --git a/examples/partial_streaming/benchmark.py b/examples/partial_streaming/benchmark.py
@@ -0,0 +1,100 @@
+# Part of this code is adapted from the following examples from OpenAI Cookbook:
+# https://cookbook.openai.com/examples/how_to_stream_completions
+# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+import time
+import tiktoken
+import instructor
+from openai import OpenAI
+from pydantic import BaseModel
+
+client = instructor.patch(OpenAI(), mode=instructor.Mode.MD_JSON)
+
+
+def num_tokens_from_string(string: str, model_name: str) -> int:
+    """Returns the number of tokens in a text string."""
+    encoding = tiktoken.encoding_for_model(model_name)
+
+    num_tokens = len(encoding.encode(string))
+    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+
+    return num_tokens
+
+
+class User(BaseModel):
+    name: str
+    role: str
+    age: int
+
+
+PartialUser = instructor.Partial[User]
+
+
+def benchmark_raw_stream(model="gpt-4"):
+    content = f"""Respond only in JSON that would validate to this schema and include nothing extra. 
+    Otherwise something bad will happen:\n {User.model_json_schema()}"""
+
+    start_time = time.time()
+    extraction_stream = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": content},
+            {
+                "role": "user",
+                "content": "give me a harry pottery character in json, name, role, age",
+            },
+        ],
+        stream=True,
+    )
+
+    collected_messages = [chunk.choices[0].delta.content for chunk in extraction_stream]
+    collected_messages = [m for m in collected_messages if m is not None]
+    collected_messages = "".join(collected_messages)
+    User.model_validate_json(collected_messages)
+    end_time = time.time() - start_time
+
+    output_tokens = num_tokens_from_string(collected_messages, model)
+    char_per_sec = output_tokens / end_time
+    return char_per_sec
+
+
+def benchmark_partial_streaming(model="gpt-4"):
+    start_time = time.time()
+    extraction_stream = client.chat.completions.create(
+        model=model,
+        response_model=PartialUser,
+        messages=[
+            {
+                "role": "user",
+                "content": "give me a harry pottery character in json, name, role, age",
+            }
+        ],
+        stream=True,
+    )
+
+    for chunk in extraction_stream:
+        pass
+    end_time = time.time() - start_time
+
+    output_tokens = num_tokens_from_string(chunk.model_dump_json(), model)
+    char_per_sec = output_tokens / end_time
+    return char_per_sec
+
+
+if __name__ == "__main__":
+    partial_times = [
+        benchmark_partial_streaming(model="gpt-3.5-turbo-1106") for _ in range(10)
+    ]
+    avg_partial_time = sum(partial_times) / len(partial_times)
+
+    raw_times = [benchmark_raw_stream(model="gpt-3.5-turbo") for _ in range(10)]
+    avg_raw_time = sum(raw_times) / len(raw_times)
+    print(f"Raw streaming: {avg_raw_time:.2f} tokens/sec")
+
+    print(f"Partial streaming: {avg_partial_time:.2f} token/sec")
+    print(f"Relative speedup: {avg_partial_time / avg_raw_time:.2f}x")
+
+    """
+    Raw streaming: 22.36 tokens/sec
+    Partial streaming: 15.46 token/sec
+    Relative speedup: 0.69x
+    """
diff --git a/examples/partial_streaming/partial_streaming.py b/examples/partial_streaming/partial_streaming.py
@@ -0,0 +1,60 @@
+import instructor
+from openai import OpenAI
+from pydantic import BaseModel
+from typing import List
+
+client = instructor.patch(OpenAI())
+
+text_block = """
+In our recent online meeting, participants from various backgrounds joined to discuss the upcoming tech conference. The names and contact details of the participants were as follows:
+
+- Name: John Doe, Email: [email protected], Twitter: @TechGuru44
+- Name: Jane Smith, Email: [email protected], Twitter: @DigitalDiva88
+- Name: Alex Johnson, Email: [email protected], Twitter: @CodeMaster2023
+
+During the meeting, we agreed on several key points. The conference will be held on March 15th, 2024, at the Grand Tech Arena located at 4521 Innovation Drive. Dr. Emily Johnson, a renowned AI researcher, will be our keynote speaker.
+
+The budget for the event is set at $50,000, covering venue costs, speaker fees, and promotional activities. Each participant is expected to contribute an article to the conference blog by February 20th.
+
+A follow-up meetingis scheduled for January 25th at 3 PM GMT to finalize the agenda and confirm the list of speakers.
+"""
+
+
+class User(BaseModel):
+    name: str
+    email: str
+    twitter: str
+
+
+class MeetingInfo(BaseModel):
+    users: List[User]
+    date: str
+    location: str
+    budget: int
+    deadline: str
+
+
+PartialMeetingInfo = instructor.Partial[MeetingInfo]
+
+
+extraction_stream = client.chat.completions.create(
+    model="gpt-4",
+    response_model=PartialMeetingInfo,
+    messages=[
+        {
+            "role": "user",
+            "content": f"Get the information about the meeting and the users {text_block}",
+        },
+    ],
+    stream=True,
+)  # type: ignore
+
+
+from rich.console import Console
+
+console = Console()
+
+for extraction in extraction_stream:
+    obj = extraction.model_dump()
+    console.clear()
+    console.print(obj)
diff --git a/instructor/__init__.py b/instructor/__init__.py
@@ -1,5 +1,12 @@
 from .distil import FinetuneFormat, Instructions
-from .dsl import CitationMixin, Maybe, MultiTask, llm_validator, openai_moderation
+from .dsl import (
+    CitationMixin,
+    Maybe,
+    Partial,
+    MultiTask,
+    llm_validator,
+    openai_moderation,
+)
 from .function_calls import OpenAISchema, openai_schema, Mode
 from .patch import apatch, patch
 
@@ -8,6 +15,7 @@
     "CitationMixin",
     "MultiTask",
     "Maybe",
+    "Partial",
     "openai_schema",
     "Mode",
     "patch",

diff --git a/instructor/dsl/__init__.py b/instructor/dsl/__init__.py
@@ -1,12 +1,14 @@
 from .multitask import MultiTask
 from .maybe import Maybe
+from .partial import Partial
 from .validators import llm_validator, openai_moderation
 from .citation import CitationMixin
 
 __all__ = [  # noqa: F405
     "CitationMixin",
     "MultiTask",
     "Maybe",
+    "Partial",
     "llm_validator",
     "openai_moderation",
 ]