feat: implement Pyright for Type-Checking (instructor-ai#630)

Co-authored-by: Jason Liu <[email protected]>
chiradeepv · Apr 29, 2024 · fadeb47 · fadeb47
1 parent 5b453ee
commit fadeb47
Show file tree

Hide file tree

Showing 82 changed files with 752 additions and 712 deletions.
diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
diff --git a/.github/workflows/pyright.yml b/.github/workflows/pyright.yml
@@ -0,0 +1,52 @@
+name: Pyright
+
+on:
+  push:
+  pull_request:
+    branches: [ main ]
+
+env:
+  WORKING_DIRECTORY: "."
+  PYRIGHT_OUTPUT_FILENAME: "pyright.log"
+
+jobs:
+  Pyright:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Cache Poetry virtualenv
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pypoetry/virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+
+      - name: Install Poetry
+        uses: snok/[email protected]
+
+      - name: Install dependencies
+        run: poetry install --with dev,anthropic
+
+      - name: Run Static Type Checking with Pyright
+        run: |
+          set -e -o pipefail
+          poetry run pyright > ${{ env.WORKING_DIRECTORY }}/${{ env.PYRIGHT_OUTPUT_FILENAME }}
+
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: pyright-log
+          path: ${{ env.WORKING_DIRECTORY }}/${{ env.PYRIGHT_OUTPUT_FILENAME }}
diff --git a/.mypy.ini b/.mypy.ini
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,16 +8,7 @@ repos:
         files: ^(instructor|tests|examples)/
       - id: ruff-format       # Run the formatter.
         name: Run Formatter (Ruff)
-  - repo: local
+  - repo: https://github.com/RobertCraigie/pyright-python
+    rev: v1.1.360
     hooks:
-      - id: ci_type_mypy
-        name: Run Type Check (Mypy)
-        entry: >
-            bash -c 'set -o pipefail;
-            export CUSTOM_PACKAGES="instructor/_types/_alias.py instructor/cli/cli.py instructor/cli/files.py instructor/cli/usage.py instructor/exceptions.py" &&
-            export CUSTOM_FLAGS="--python-version=3.9 --color-output --no-pretty --follow-imports=skip" &&
-            curl -sSL https://raw.githubusercontent.com/gao-hongnan/omniverse/2fd5de1b8103e955cd5f022ab016b72fa901fa8f/scripts/devops/continuous-integration/type_mypy.sh |
-            bash'
-        language: system
-        types: [python]
-        pass_filenames: false
+    - id: pyright
diff --git a/.ruff.toml b/.ruff.toml
@@ -39,6 +39,8 @@ select = [
   "E722",
   # unused arguments
   "ARG",
+  # Enforce modern type-syntax
+  "UP006",
 ]
 ignore = [
   # mutable defaults

diff --git a/examples/anthropic/run.py b/examples/anthropic/run.py
@@ -1,5 +1,4 @@
 from pydantic import BaseModel
-from typing import List
 import anthropic
 import instructor
 
@@ -15,7 +14,7 @@ class Properties(BaseModel):
 class User(BaseModel):
     name: str
     age: int
-    properties: List[Properties]
+    properties: list[Properties]
 
 
 user = client.messages.create(

diff --git a/examples/auto-ticketer/run.py b/examples/auto-ticketer/run.py
@@ -1,7 +1,7 @@
 import instructor
 from openai import OpenAI
 
-from typing import List, Optional
+from typing import Optional
 from pydantic import BaseModel, Field
 from enum import Enum
 
@@ -32,11 +32,11 @@ class Ticket(BaseModel):
     name: str = Field(..., description="Title of the task")
     description: str = Field(..., description="Detailed description of the task")
     priority: PriorityEnum = Field(..., description="Priority level")
-    assignees: List[str] = Field(..., description="List of users assigned to the task")
-    subtasks: Optional[List[Subtask]] = Field(
+    assignees: list[str] = Field(..., description="List of users assigned to the task")
+    subtasks: Optional[list[Subtask]] = Field(
         None, description="List of subtasks associated with the main task"
     )
-    dependencies: Optional[List[int]] = Field(
+    dependencies: Optional[list[int]] = Field(
         None, description="List of ticket IDs that this ticket depends on"
     )
 
@@ -46,7 +46,7 @@ class ActionItems(BaseModel):
     Correctly resolved set of action items from the given transcript
     """
 
-    items: List[Ticket]
+    items: list[Ticket]
 
 
 def generate(data: str):

diff --git a/examples/avail/run.py b/examples/avail/run.py
@@ -1,5 +1,5 @@
 from pydantic import BaseModel, Field
-from typing import Iterable, List, Literal
+from typing import Iterable, Literal
 from datetime import datetime, timedelta
 
 from openai import OpenAI
@@ -17,7 +17,7 @@ class DateRange(BaseModel):
         default=None,
         description="If the date range repeats, and how often, this way we can generalize the date range to the future., if its special, then we can assume it is a one time event.",
     )
-    days_of_week: List[
+    days_of_week: list[
         Literal[
             "monday",
             "tuesday",
@@ -41,7 +41,7 @@ class DateRange(BaseModel):
 
 
 class AvailabilityResponse(BaseModel):
-    availability: List[DateRange]
+    availability: list[DateRange]
 
 
 def prepare_dates(n=7) -> str:

diff --git a/examples/avail/run_mixtral.py b/examples/avail/run_mixtral.py
@@ -1,6 +1,6 @@
 import os
 from pydantic import BaseModel, Field
-from typing import List, Literal
+from typing import Literal
 from datetime import datetime, timedelta
 
 from openai import OpenAI
@@ -25,7 +25,7 @@ class DateRange(BaseModel):
         default=None,
         description="If the date range repeats, and how often, this way we can generalize the date range to the future., if its special, then we can assume it is a one time event.",
     )
-    days_of_week: List[
+    days_of_week: list[
         Literal[
             "monday",
             "tuesday",
@@ -49,7 +49,7 @@ class DateRange(BaseModel):
 
 
 class AvailabilityResponse(BaseModel):
-    availability: List[DateRange]
+    availability: list[DateRange]
 
 
 def prepare_dates(n=7) -> str:

diff --git a/examples/batch-classification/run-cache.py b/examples/batch-classification/run-cache.py
@@ -3,7 +3,6 @@
 
 from openai import AsyncOpenAI
 from pydantic import BaseModel, Field, field_validator
-from typing import List
 from enum import Enum
 
 client = instructor.from_openai(AsyncOpenAI(), mode=instructor.Mode.TOOLS)
@@ -40,7 +39,7 @@ class QuestionClassification(BaseModel):
     chain_of_thought: str = Field(
         ..., description="The chain of thought that led to the classification"
     )
-    classification: List[QuestionType] = Field(
+    classification: list[QuestionType] = Field(
         description=f"An accuracy and correct prediction predicted class of question. Only allowed types: {[t.value for t in QuestionType]}, should be used",
     )
 
@@ -68,7 +67,7 @@ async def classify(data: str):
         )
 
 
-async def main(questions: List[str]):
+async def main(questions: list[str]):
     tasks = [classify(question) for question in questions]
     resps = []
     for task in asyncio.as_completed(tasks):

diff --git a/examples/batch-classification/run.py b/examples/batch-classification/run.py
@@ -4,7 +4,6 @@
 
 from openai import AsyncOpenAI
 from pydantic import BaseModel, Field, field_validator
-from typing import List
 from enum import Enum
 
 client = AsyncOpenAI()
@@ -42,7 +41,7 @@ class QuestionClassification(BaseModel):
     chain_of_thought: str = Field(
         ..., description="The chain of thought that led to the classification"
     )
-    classification: List[QuestionType] = Field(
+    classification: list[QuestionType] = Field(
         description=f"An accuracy and correct prediction predicted class of question. Only allowed types: {[t.value for t in QuestionType]}, should be used",
     )
 
@@ -69,7 +68,7 @@ async def classify(data: str):
         )
 
 
-async def main(questions: List[str], *, path_to_jsonl: str = None):
+async def main(questions: list[str], *, path_to_jsonl: str = None):
     tasks = [classify(question) for question in questions]
     for task in asyncio.as_completed(tasks):
         question, label = await task

diff --git a/examples/batch-classification/run_langsmith.py b/examples/batch-classification/run_langsmith.py
@@ -6,7 +6,6 @@
 
 from openai import AsyncOpenAI
 from pydantic import BaseModel, Field, field_validator
-from typing import List
 from enum import Enum
 
 client = wrap_openai(AsyncOpenAI())
@@ -44,7 +43,7 @@ class QuestionClassification(BaseModel):
     chain_of_thought: str = Field(
         ..., description="The chain of thought that led to the classification"
     )
-    classification: List[QuestionType] = Field(
+    classification: list[QuestionType] = Field(
         description=f"An accuracy and correct prediction predicted class of question. Only allowed types: {[t.value for t in QuestionType]}, should be used",
     )
 
@@ -73,7 +72,7 @@ async def classify(data: str):
         )
 
 
-async def main(questions: List[str]):
+async def main(questions: list[str]):
     tasks = [classify(question) for question in questions]
     resps = []
     for task in asyncio.as_completed(tasks):

diff --git a/examples/chain-of-density/chain_of_density.py b/examples/chain-of-density/chain_of_density.py
@@ -1,5 +1,4 @@
 from pydantic import BaseModel, Field, field_validator
-from typing import List
 import instructor
 import nltk
 from openai import OpenAI
@@ -38,12 +37,12 @@ class RewrittenSummary(BaseModel):
         ...,
         description="This is a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities. It should have the same length ( ~ 80 words ) as the previous summary and should be easily understood without the Article",
     )
-    absent: List[str] = Field(
+    absent: list[str] = Field(
         ...,
         default_factory=list,
         description="this is a list of Entities found absent from the new summary that were present in the previous summary",
     )
-    missing: List[str] = Field(
+    missing: list[str] = Field(
         default_factory=list,
         description="This is a list of 1-3 informative Entities from the Article that are missing from the new summary which should be included in the next generated summary.",
     )
@@ -77,15 +76,15 @@ def min_length(cls, v: str):
         return v
 
     @field_validator("missing")
-    def has_missing_entities(cls, missing_entities: List[str]):
+    def has_missing_entities(cls, missing_entities: list[str]):
         if len(missing_entities) == 0:
             raise ValueError(
                 "You must identify 1-3 informative Entities from the Article which are missing from the previously generated summary to be used in a new summary"
             )
         return missing_entities
 
     @field_validator("absent")
-    def has_no_absent_entities(cls, absent_entities: List[str]):
+    def has_no_absent_entities(cls, absent_entities: list[str]):
         absent_entity_string = ",".join(absent_entities)
         if len(absent_entities) > 0:
             print(f"Detected absent entities of {absent_entity_string}")