diff --git a/.gitignore b/.gitignore index 89929d7e..e3818273 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,6 @@ appsettings.json __pycache__ -.cache \ No newline at end of file +.cache + +*.idea/ \ No newline at end of file diff --git a/gen-ai/Assistants/law_firm_assistant/.gitignore b/gen-ai/Assistants/law_firm_assistant/.gitignore new file mode 100644 index 00000000..fdc42473 --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/.gitignore @@ -0,0 +1,3 @@ +.idea +*.log +*.env diff --git a/gen-ai/Assistants/law_firm_assistant/README.md b/gen-ai/Assistants/law_firm_assistant/README.md new file mode 100644 index 00000000..9682a116 --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/README.md @@ -0,0 +1,206 @@ +# Law Firm Assistant + +## Overview +This sample provides a guide to use the new Web Browser tool with the Azure OpenAI Assistants. +This tool is based on Bing Search API and allows to easily implement a public web data grounding. + + +Given LLMs have data available only up to a cut off date, it might not handle questions that require up-to-date information. +And this is where the Web Browse tool comes in handy! + + +## Objective +The objective of this sample is to create an OpenAI assistant for law firms that helps answer questions about court cases. +Besides using the browser tool, the assistant will also use the [file search tool](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/file-search?tabs=python) to search for information in the law firm's index. + +The assistant is implemented through a CLI in python (command line interface) which the user can use to interact with the assistant. + +By the end of this tutorial, you should be able to: +- Create an OpenAI assistant that uses the Web Browse and the File Search tools + +## Programming Languages +- Python + +## Estimated Runtime: 10 mins + +## Pre-requisites +- A [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) resource (API key + endpoint) +- A [Bing Search](https://www.microsoft.com/en-us/bing/apis/bing-custom-search-api?msockid=05017416a2426182001960bfa3e36056) resource + - The Bing resource should be a Bing Search v7 resource and the SKU should be S15 or S16 + - The Azure OpenAI resource needs to have 'Contributor' role on the selected Bing resource to use it +- Python 3.10 or later + +## Running the sample + +### Step 1: Fill in the environment variables +Create an `.env` file with the following environment variables: +```commandline +OPENAI_KEY="" +OPENAI_ENDPOINT="" +BING_RESOURCE_ID="" +``` + +Note: The first time you run, an assistant and vector stored will be created. Its ids will be stored in a new `assistant.env` file, which is used to load the assistant and vector in the following runs. + + +### Step 2: Install requirements +```commandline +pip install -r requirements.txt +``` + +### Step 3: Run the sample +```commandline +python app.py +``` + +In order to exit the application you can type `exit`. +```commandline +Your input: exit +``` + +## Example +```commandline +I'm a law firm assistant. +How can I help you with court cases! + +Your input: >? do we have any internal case about waste disposal? +Assistant: Yes, we have an internal case related to waste disposal: +**Case Title:** Contoso Company v. United States Environmental Protection Agency (EPA) +**Docket Number:** 24-1234 +**Facts of the Case:** Contoso Company, a major manufacturer of electronic components, was fined by the EPA for allegedly violating environmental regulations related to the disposal of hazardous waste. Contoso argued that the regulations were overly vague and that they had taken all reasonable steps to comply with environmental laws. +**Legal Question:** Does the EPA’s regulation on hazardous waste disposal violate the Due Process Clause of the Fifth Amendment by being unconstitutionally vague? +**Arguments:** +- **Contoso Company:** They contended that the EPA’s regulations were so vague that they did not provide a clear standard for compliance, thus violating the company’s right to due process. They argued that the fines imposed were unjust and that the regulations needed to be more specific to be enforceable. +- **EPA:** The EPA argued that the regulations were clear and necessary to protect the environment and public health. They maintained that Contoso failed to meet the established standards and that the fines were justified. +**Decision:** The Supreme Court ruled in favor of the EPA, holding that the regulations were sufficiently clear and that Contoso had not demonstrated that they were unconstitutionally vague. The Court emphasized the importance of environmental protection and the need for companies to adhere to established standards. +**Opinion:** Justice Smith, writing for the majority, stated that while regulations must be clear, they do not need to be perfectly precise. The Court found that the EPA’s guidelines provided adequate notice of what was required and that Contoso had ample opportunity to comply【4:0†source】. + +Your input: >? list me 3 similar external cases from the last month +Assistant: Here are three recent external cases related to waste disposal from July 2024: +1. **Hawkins v. Hopkinsville Solid Waste Enterprise et al** + **Court:** US District Court for the Western District of Kentucky + **Filed:** July 28, 2024 + **Details:** This case involves a dispute over waste disposal practices by the Hopkinsville Solid Waste Enterprise【8:0†source】. +2. **Agarin v. Waste Management Solution LLC a/k/a WM Solutions d/b/a WM et al** + **Court:** New York Eastern District Court + **Filed:** July 31, 2024 + **Details:** This case includes allegations of improper handling and disposal of personal injury-related waste by WM Solutions【8:1†source】. +3. **Zurich American Insurance Company v. Discount Waste, Inc. et al** + **Filed:** July 26, 2024 + **Details:** The insurance company filed a complaint against Discount Waste for damages related to improper waste disposal practices【8:3†source】. +Url citations: +* 【8:0†source】 - [Hawkins v. Hopkinsville Solid Waste Enterprise et al](https://dockets.justia.com/docket/kentucky/kywdce/5:2024cv00130/135679) +* 【8:0†source】 - [Calendars - District Court - King County, Washington](https://kingcounty.gov/en/court/district-court/courts-jails-legal-system/court-calendars-locations-operations/calendars) +* 【8:1†source】 - [Agarin v. Waste Management Solution LLC a/k/a WM Solutions d/b/a WM et ...](https://www.pacermonitor.com/public/case/54510346/Agarin_v_Waste_Management_Solution_LLC_aka_WM_Solutions_dba_WM_et_al) +* 【8:1†source】 - [Calendars - District Court - King County, Washington](https://kingcounty.gov/en/court/district-court/courts-jails-legal-system/court-calendars-locations-operations/calendars) +* 【8:3†source】 - [Zurich American Insurance Company v. Discount Waste, Inc. et al](https://dockets.justia.com/docket/florida/flmdce/3:2024cv00753/430469) +* 【8:3†source】 - [Calendars - District Court - King County, Washington](https://kingcounty.gov/en/court/district-court/courts-jails-legal-system/court-calendars-locations-operations/calendars) + +Your input: >? compare the contoso case against the external ones in a table format +Assistant: Here is a comparison of the Contoso case against the three recent external cases in a table format: +| **Aspect** | **Contoso Company v. EPA** | **Hawkins v. Hopkinsville Solid Waste Enterprise et al** | **Agarin v. Waste Management Solution LLC et al** | **Zurich American Insurance Company v. Discount Waste Inc. et al** | +|------------|----------------------------|--------------------------------------------------------|--------------------------------------------------|---------------------------------------------------------------| +| **Court** | U.S. Supreme Court | U.S. District Court for the Western District of Kentucky | New York Eastern District Court | (Court not specified in available details) | +| **Date Filed** | (Filed on unspecified date) | July 28, 2024 | July 31, 2024 | July 26, 2024 | +| **Parties Involved** | Contoso Company, EPA | Hawkins, Hopkinsville Solid Waste Enterprise | Agarin, Waste Management Solution LLC (WM Solutions), WM | Zurich American Insurance Company, Discount Waste Inc. | +| **Legal Question** | Constitutionality of EPA's hazardous waste disposal regulations | Dispute over waste disposal practices | Improper handling and disposal of personal injury-related waste | Damages related to improper waste disposal practices | +| **Main Argument (Plaintiff)** | EPA's regulations were vague, violating due process | Waste disposal practices were improper or harmful | Mismanagement of waste, leading to personal injury | Improper waste disposal causing damages | +| **Main Argument (Defendant)** | Regulations were clear and necessary for environmental protection | (Not specified in available details) | (Not specified in available details) | (Not specified in available details) | +| **Decision** | EPA's regulations upheld; found sufficiently clear and enforceable | (Decision pending) | (Decision pending) | (Decision pending) | +This table highlights the main aspects and differences between the Contoso case and the three recent external cases related to waste disposal. +``` + +``` +[2024-08-01 14:00:43 - assistant:36 - INFO] Vector store with id already exists +[2024-08-01 14:00:44 - assistant:68 - INFO] Assistant with id already exists +[2024-08-01 14:00:44 - cli:19 - INFO] starting conversation with assistant (assistant_id=, thread_id=) +[2024-08-01 14:00:55 - event_handler:21 - INFO] started calling tool file_search +[2024-08-01 14:00:58 - event_handler:24 - INFO] completed calling tool file_search +[2024-08-01 14:01:04 - event_handler:24 - INFO] completed calling tool file_search +[2024-08-01 14:01:04 - event_handler:24 - INFO] completed calling tool file_search +[2024-08-01 14:01:04 - event_handler:44 - INFO] completed conversation with assistant +[2024-08-01 14:01:20 - event_handler:21 - INFO] started calling tool browser +[2024-08-01 14:01:21 - event_handler:24 - INFO] completed calling tool browser +[2024-08-01 14:01:24 - event_handler:24 - INFO] completed calling tool browser +[2024-08-01 14:01:24 - event_handler:24 - INFO] completed calling tool browser +[2024-08-01 14:01:24 - event_handler:44 - INFO] completed conversation with assistant +[2024-08-01 14:01:58 - event_handler:44 - INFO] completed conversation with assistant + +``` + +## Understanding the Solution + +### OpenAI Client + +- Create an OpenAI client with: + - at least `2024-07-01-preview` version + - passing the header "X-Ms-Enable-Preview": "true" +```python + client = AzureOpenAI( + api_key=openai_key, + api_version="2024-07-01-preview", + azure_endpoint=openai_endpoint, + default_headers={"X-Ms-Enable-Preview": "true"} + ) +``` + +### Vector Store + +- Create a vector store +```python +# assistant.py +vector_store = client.beta.vector_stores.create(name="courtcases") +``` + +- Upload files to vector store +```python +# assistant.py +client.beta.vector_stores.file_batches.upload_and_poll( + vector_store_id=vector_store_id, files=file_streams +) +``` + +### Assistant +- Create the assistant with file search and browser tools + - browser tool needs the bing resource id + - file search tool needs the vector store id +```python +# assistant.py +assistant = client.beta.assistants.create( + name="Law firm copilot", + instructions=''' +You are a law firm assistant that answers questions about court cases. + +You are only allowed to: +- use the file search tool to search for internal court cases +- use the browser tool to look for court cases on the web + +You are not allowed to answer questions that are not related to court cases + ''', + tools=[{ + "type": "file_search" + }, { + "type": "browser", + "browser": { + "bing_resource_id": bing_resource_id + } + }], + tool_resources={ + "file_search": { + "vector_store_ids": [vector_store_id] + } + }, + model="gpt-4o-0513", +) +``` + +## FAQ + +### How can I validate whether a tool was called? +You can validate the browser tool was called by checking the logs (`app.log` file). You should see a log similar to the following: +``` +INFO:event_handler:completed calling tool +``` + +If you want you can also debug the `event_handler.py` file. +When the tool call is completed, the `on_tool_call_done` method is called. You can add a breakpoint there to check the response. \ No newline at end of file diff --git a/gen-ai/Assistants/law_firm_assistant/app.py b/gen-ai/Assistants/law_firm_assistant/app.py new file mode 100644 index 00000000..585fdbeb --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/app.py @@ -0,0 +1,45 @@ +import os +import logging + +from openai import AzureOpenAI +from dotenv import load_dotenv + +from cli import Cli +from assistant import setup_assistant + +load_dotenv() +logging.basicConfig( + filename='app.log', + format="[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO +) + +logger = logging.getLogger(__name__) + + +if __name__ == "__main__": + bing_resource_id = os.getenv("BING_RESOURCE_ID") + openai_key = os.getenv("OPENAI_KEY") + openai_endpoint = os.getenv("OPENAI_ENDPOINT") + + # validate environment variables + if bing_resource_id is None: + raise ValueError("BING_RESOURCE_ID is not set") + if openai_key is None: + raise ValueError("API_KEY is not set") + if openai_endpoint is None: + raise ValueError("AZURE_ENDPOINT is not set") + + client = AzureOpenAI( + api_key=openai_key, + api_version="2024-07-01-preview", + azure_endpoint=openai_endpoint, + default_headers={"X-Ms-Enable-Preview": "true"} + ) + + assistant_id = setup_assistant(client=client, bing_resource_id=bing_resource_id) + + runner = Cli(client, assistant_id) + + runner.run() diff --git a/gen-ai/Assistants/law_firm_assistant/assets/contoso_case.txt b/gen-ai/Assistants/law_firm_assistant/assets/contoso_case.txt new file mode 100644 index 00000000..ae98f3b2 --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/assets/contoso_case.txt @@ -0,0 +1,15 @@ +Case Title: Contoso Company v. United States Environmental Protection Agency (EPA) + +Docket Number: 24-1234 + +Facts of the Case: Contoso Company, a major manufacturer of electronic components, was fined by the EPA for allegedly violating environmental regulations related to the disposal of hazardous waste. Contoso argued that the regulations were overly vague and that they had taken all reasonable steps to comply with environmental laws. + +Legal Question: Does the EPA’s regulation on hazardous waste disposal violate the Due Process Clause of the Fifth Amendment by being unconstitutionally vague? + +Arguments: + +Contoso Company: Contoso contends that the EPA’s regulations are so vague that they do not provide a clear standard for compliance, thus violating the company’s right to due process. They argue that the fines imposed are unjust and that the regulations need to be more specific to be enforceable. +EPA: The EPA argues that the regulations are clear and necessary to protect the environment and public health. They maintain that Contoso failed to meet the established standards and that the fines are justified. +Decision: The Supreme Court ruled in favor of the EPA, holding that the regulations were sufficiently clear and that Contoso had not demonstrated that they were unconstitutionally vague. The Court emphasized the importance of environmental protection and the need for companies to adhere to established standards. + +Opinion: Justice Smith, writing for the majority, stated that while regulations must be clear, they do not need to be perfectly precise. The Court found that the EPA’s guidelines provided adequate notice of what was required and that Contoso had ample opportunity to comply. diff --git a/gen-ai/Assistants/law_firm_assistant/assistant.py b/gen-ai/Assistants/law_firm_assistant/assistant.py new file mode 100644 index 00000000..0c8b73e0 --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/assistant.py @@ -0,0 +1,110 @@ +import os +import logging +from typing import TextIO + +from dotenv import load_dotenv +from openai import AzureOpenAI + +assistant_id_env_name = "ASSISTANT_ID" +vector_store_id_env_name = "VECTOR_STORE_ID" +assistant_env_filename = "assistant.env" + +load_dotenv(dotenv_path=assistant_env_filename) +file_paths = [ + "./assets/contoso_case.txt" +] + + +logger = logging.getLogger(__name__) + + +def setup_assistant(client: AzureOpenAI, bing_resource_id: str) -> str: + with open(assistant_env_filename, "a") as env_file: + vector_store_id = get_or_create_vector_store(client, env_file) + assistant_id = get_or_create_assistant(client, bing_resource_id, vector_store_id, env_file) + + return assistant_id + + +def get_or_create_vector_store(client: AzureOpenAI, env_file: TextIO) -> str: + vector_store_id = os.getenv(vector_store_id_env_name) + + if vector_store_id is not None: + try: + # validates vector store exists + client.beta.vector_stores.retrieve(vector_store_id=vector_store_id) + logger.info("Vector store with id {} already exists".format(vector_store_id)) + return vector_store_id + except Exception as ex: + raise Exception(f"Error retrieving vector store with id {vector_store_id}: {ex}") + + vector_store = client.beta.vector_stores.create(name="courtcases") + vector_store_id = vector_store.id + logger.info("Created new vector store with id {}".format(vector_store_id)) + + # stores the id in the assistant.env file + write_env(env_file, vector_store_id_env_name, vector_store_id) + + # Ready the files for upload to OpenAI + file_streams = [open(path, "rb") for path in file_paths] + + # Use the upload and poll SDK helper to upload the files, add them to the vector store, + # and poll the status of the file batch for completion. + client.beta.vector_stores.file_batches.upload_and_poll( + vector_store_id=vector_store_id, files=file_streams + ) + + logger.info("Uploaded files to vector store: [{}]".format(file_paths)) + return vector_store_id + + +def get_or_create_assistant(client: AzureOpenAI, bing_resource_id: str, vector_store_id: str, env_file: TextIO) -> str: + assistant_id = os.getenv(assistant_id_env_name) + + if assistant_id is not None: + try: + # validates vector store exists + client.beta.assistants.retrieve(assistant_id=assistant_id) + logger.info("Assistant with id {} already exists".format(assistant_id)) + return assistant_id + except Exception as ex: + raise Exception(f"Error retrieving assistant with id {assistant_id}: {ex}") + + assistant = client.beta.assistants.create( + name="Law firm copilot", + instructions=''' +You are a law firm assistant that answers questions about court cases. + +You are only allowed to: +- use the file search tool to search for internal court cases +- use the browser tool to look for court cases on the web + +You are not allowed to answer questions that are not related to court cases + ''', + tools=[{ + "type": "file_search" + }, { + "type": "browser", + "browser": { + "bing_resource_id": bing_resource_id + } + }], + tool_resources={ + "file_search": { + "vector_store_ids": [vector_store_id] + } + }, + model="gpt-4o-0513", + ) + assistant_id = assistant.id + + logger.info("Created new assistant with id {}".format(assistant_id)) + + # stores the id in the assistant.env file + write_env(env_file, assistant_id_env_name, assistant_id) + + return assistant_id + + +def write_env(env_file: TextIO, key: str, value: str): + env_file.write("{}=\"{}\"\n".format(key, value)) diff --git a/gen-ai/Assistants/law_firm_assistant/cli.py b/gen-ai/Assistants/law_firm_assistant/cli.py new file mode 100644 index 00000000..e8c9669c --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/cli.py @@ -0,0 +1,43 @@ +import logging + +from openai import AzureOpenAI + +from event_handler import EventHandler + + +logger = logging.getLogger(__name__) + + +class Cli: + def __init__(self, client: AzureOpenAI, assistant_id: str): + self.client = client + self.assistant_id = assistant_id + + def run(self): + thread = self.client.beta.threads.create() + + logger.info("starting conversation with assistant (assistant_id={}, thread_id={})".format(self.assistant_id, thread.id)) + + print(''' +I'm a law firm assistant. +How can I help you with court cases! + ''') + + while True: + user_input = input("\nYour input: ") + + if user_input == "exit": + print("Exiting conversation with assistant") + break + + self.client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=user_input + ) + + print("\nAssistant: ", end="", flush=True) + event_handler = EventHandler() + with self.client.beta.threads.runs.stream(assistant_id=self.assistant_id, thread_id=thread.id, + event_handler=event_handler) as stream: + stream.until_done() diff --git a/gen-ai/Assistants/law_firm_assistant/event_handler.py b/gen-ai/Assistants/law_firm_assistant/event_handler.py new file mode 100644 index 00000000..91ac80fb --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/event_handler.py @@ -0,0 +1,51 @@ +import logging +from typing import Any + +from openai import AssistantEventHandler +from openai.types.beta.threads.runs import ToolCall +from openai.types.beta.threads import Text, TextDelta + + +logger = logging.getLogger(__name__) + + +class EventHandler(AssistantEventHandler): + def __init__(self): + super().__init__() + self.is_processing_annotation = False + + def on_exception(self, exception: Exception) -> None: + logger.error("please try again. an exception occurred: {}".format(exception)) + + def on_tool_call_created(self, tool_call: Any): + logger.info("started calling tool {}".format(get_tool_type(tool_call))) + + def on_tool_call_done(self, tool_call: ToolCall) -> None: + logger.info("completed calling tool {}".format(get_tool_type(tool_call))) + + def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None: + print(delta.value, end="", flush=True) + + def on_text_done(self, text: Text) -> None: + is_first_url_citation = True + for annotation in text.annotations: + if annotation.type == "url_citation": + if is_first_url_citation: + print("\nUrl citations: \n", end="", flush=True) + is_first_url_citation = False + title = annotation.model_extra['url_citation']['title'] + url = annotation.model_extra['url_citation']['url'] + print("* {} - [{}]({})\n".format(annotation.text, title, url), end="", flush=True) + + def on_timeout(self) -> None: + logger.warning("timeout occurred. please try again") + + def on_end(self) -> None: + logger.info("completed conversation with assistant") + + +def get_tool_type(tool_call: Any) -> str: + if isinstance(tool_call, dict): + return tool_call['type'] + else: + return tool_call.type diff --git a/gen-ai/Assistants/law_firm_assistant/requirements.txt b/gen-ai/Assistants/law_firm_assistant/requirements.txt new file mode 100644 index 00000000..30bb9023 --- /dev/null +++ b/gen-ai/Assistants/law_firm_assistant/requirements.txt @@ -0,0 +1,2 @@ +openai==1.30.1 +python-dotenv==0.21.0