Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I couldn't upload file an use the one at thread normally via openai library... #1775

Open
1 task done
alex-deus opened this issue Oct 4, 2024 · 1 comment
Open
1 task done
Labels
bug Something isn't working

Comments

@alex-deus
Copy link

Confirm this is an issue with the Python library and not an underlying OpenAI API

  • This is an issue with the Python library

Describe the bug

I uploaded file to vector storage, but I couldn't use the file at threads:
It seems there was an error while trying to search the uploaded files. Could you please try uploading the file again, or let me know if there is a specific file you want me to look into?.

To Reproduce

  1. create vector_store
  2. create assistant
  3. upload file
  4. wait the uploading
  5. attach file to vector store
  6. create thread
  7. create run
  8. wait completing the run
  9. get messages
  10. take run's message

Code snippets

import json
import os
import time

from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

vector_store = client.beta.vector_stores.create(name="Test")

assistant = client.beta.assistants.create(
    description=f"Test",
    model="gpt-4o",
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
    temperature=0.4
)

# Upload file
file = client.files.create(file=("data.json", json.dumps({"name": "Alexbabaliks"}).encode()), purpose="assistants")
while True:
    file_status = client.files.retrieve(file_id=file.id)
    if file_status.status == 'processed':
        break
    time.sleep(1)

client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file.id)
while True:
    vector_store = client.beta.vector_stores.retrieve(vector_store_id=vector_store.id)
    if vector_store.status == 'completed':
        break
    time.sleep(1)

thread = client.beta.threads.create(tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}})

run = client.beta.threads.runs.create(
    instructions="What is my name??? Take it from JSON file and return JSON in format {'name': '<name>'}",
    thread_id=thread.id,
    assistant_id=assistant.id,
    model="gpt-4o",
    temperature=0.4,
    tools=[{"type": "file_search"}],
)

while True:
    run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
    if run_status.status == "completed":
        break
    elif run_status.status == "failed":
        break

    time.sleep(2)

answer = ""
messages = client.beta.threads.messages.list(thread_id=thread.id)
for message in messages.data:
    if run.id != message.run_id:
        continue

    for content in message.content:
        if content.type == "text":
            answer = content.text.value
            break

print(answer)

OS

Linux

Python version

3.11.1

Library version

openai v1.51.0

@alex-deus alex-deus added the bug Something isn't working label Oct 4, 2024
@alex-deus
Copy link
Author

I came up with a solution, but it doesn’t work well for large JSON files (around 5MB), and sometimes for smaller simple file, it returns: {"n": "I couldn't find your given name in the provided documents."} :-D

import json
import os
import time

from openai import OpenAI

TEMPERATURE = 0.2


def ask(client: OpenAI, assistant_id: str, thread_id: str, file_id: str, instruction: str) -> str:
    client.beta.threads.messages.create(
        thread_id=thread_id,
        content=instruction,
        role="user",
        attachments=[{"file_id": file_id, "tools": [{"type": "file_search"}]}],
    )

    run = client.beta.threads.runs.create(
        thread_id=thread_id,
        model="gpt-4o",
        tools=[{"type": "file_search"}],
        assistant_id=assistant_id,
        temperature=TEMPERATURE
    )

    count = 0
    while True:
        run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
        if run.status == "completed":
            break
        elif run.status == "failed":
            raise Exception(f"{run.last_error.code}: {run.last_error.message}")

        count += 1
        if count > 30:
            raise Exception("Too many requests")
        else:
            time.sleep(4)

    answer = ""
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    for message in messages.data:
        if run.id != message.run_id:
            continue

        for content in message.content:
            if content.type == "text":
                answer = content.text.value
                break

    return answer


def main() -> None:
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
    
    # Create assistant
    assistant = client.beta.assistants.create(
        description="Test",
        instructions="Return all answer in JSON format",
        model="gpt-4o",
        tools=[{"type": "file_search"}],
        temperature=TEMPERATURE
    )
    
    # Upload file
    data = ("data.json", json.dumps({"given_name": "John", "family_name": "Smit"}).encode())
    file = client.files.create(file=data, purpose="assistants")
    
    thread = client.beta.threads.create()  # Create thread
    
    instruction = "What is my given_name? Answer format {'n': '<name>'}"
    answer = ask(client, assistant.id, thread.id, file.id, instruction)
    print(answer)  # Sometimes answer could be {"n": "I couldn't find your given name in the provided documents."}
    
    instruction = "What is my last family_name? Answer format {'l': '<last name>'}"
    answer = ask(client, assistant.id, thread.id, file.id, instruction)
    print(answer)


if __name__ == '__main__':
    main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

1 participant