From 58ee43a0c7f5fe68a41a7e84c9dde0cae9c9a279 Mon Sep 17 00:00:00 2001 From: charbelrami Date: Mon, 19 Feb 2024 12:01:02 -0300 Subject: [PATCH] implement assistant --- .env.example | 1 + .gitignore | 2 + assistant/.gitignore | 1 + assistant/assistant.py | 185 ++++++++++++++++++++++++ assistant/combine_components.sh | 76 ++++++++++ assistant/create_or_update_assistant.py | 161 +++++++++++++++++++++ assistant/readme.md | 86 +++++++++++ 7 files changed, 512 insertions(+) create mode 100644 .env.example create mode 100644 assistant/.gitignore create mode 100644 assistant/assistant.py create mode 100755 assistant/combine_components.sh create mode 100644 assistant/create_or_update_assistant.py create mode 100644 assistant/readme.md diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..11b2b32ad --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +OPENAI_API_KEY=your_api_key_here diff --git a/.gitignore b/.gitignore index d2276dd27..121ef8195 100644 --- a/.gitignore +++ b/.gitignore @@ -244,3 +244,5 @@ documentation.json /log /buck-out /result + +.vscode/ diff --git a/assistant/.gitignore b/assistant/.gitignore new file mode 100644 index 000000000..f7275bbbd --- /dev/null +++ b/assistant/.gitignore @@ -0,0 +1 @@ +venv/ diff --git a/assistant/assistant.py b/assistant/assistant.py new file mode 100644 index 000000000..a3cc4b23f --- /dev/null +++ b/assistant/assistant.py @@ -0,0 +1,185 @@ +import os +import asyncio +import logging +from dotenv import load_dotenv +from openai import AsyncOpenAI + + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + + +class NoHTTPRequestFilter(logging.Filter): + def filter(self, record): + return "HTTP Request:" not in record.getMessage() + + +for handler in logging.root.handlers: + handler.addFilter(NoHTTPRequestFilter()) + + +load_dotenv() + + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +if not OPENAI_API_KEY: + logging.error( + "The `OPENAI_API_KEY` environment variable is not set. For instructions on how to set it, refer to the README." + ) + exit(1) + + +client = AsyncOpenAI(api_key=OPENAI_API_KEY) + + +ASSISTANT_NAME = "Cave Echo" + + +async def retrieve_assistant_by_name(name): + try: + assistants = await client.beta.assistants.list() + for assistant in assistants.data: + if assistant.name == name: + return assistant + return None + except Exception as e: + logging.error(f"Failed to retrieve assistant by name: {e}") + return None + + +async def create_thread(): + try: + thread = await client.beta.threads.create() + return thread + except Exception as e: + logging.error(f"Failed to create thread: {e}") + return None + + +async def create_message(thread_id, content): + try: + message = await client.beta.threads.messages.create( + thread_id=thread_id, role="user", content=content + ) + return message + except Exception as e: + logging.error(f"Failed to create message: {e}") + return None + + +async def create_a_run(assistant_id, thread_id): + try: + run = await client.beta.threads.runs.create( + assistant_id=assistant_id, thread_id=thread_id + ) + return run + except Exception as e: + logging.error(f"Failed to create a run: {e}") + return None + + +async def bat_animation(): + """Displays a moving bat emoji in the console.""" + frames = [ + "🦇 ", + " 🦇 ", + " 🦇", + " 🦇 ", + ] + while True: + for frame in frames: + print(f"\r{frame}", end="", flush=True) + await asyncio.sleep(0.3) # animation speed + + +async def get_responses(thread_id, run_id): + animation_task = None + try: + animation_task = asyncio.create_task(bat_animation()) + + while True: + run = await client.beta.threads.runs.retrieve( + thread_id=thread_id, run_id=run_id + ) + if run.status == "completed": + break + await asyncio.sleep(1) + + if animation_task: + animation_task.cancel() + try: + await animation_task + except asyncio.CancelledError: + pass # expected exception on task cancellation + + print("\r", end="", flush=True) # clear the animation from the console + + messages = await client.beta.threads.messages.list(thread_id=thread_id) + if messages.data: + # the first message in the list is the latest response + message = messages.data[0] + if message.role == "assistant" and message.content: + print(f"{ASSISTANT_NAME}: {message.content[0].text.value}") + + except Exception as e: + logging.error(f"Failed to get responses: {e}") + if animation_task: + animation_task.cancel() + try: + await animation_task + except asyncio.CancelledError: + pass # again, ignore the expected cancellation error + + finally: + # ensure the line is clear of animation after exception or completion + print("\r", end="") + + +async def delete_thread(client, thread_id): + try: + response = await client.beta.threads.delete(thread_id) + logging.info(f"Thread {thread_id} deleted successfully.") + return response + except Exception as e: + logging.error(f"Failed to delete thread {thread_id}: {e}") + return None + + +async def main(): + assistant = await retrieve_assistant_by_name(ASSISTANT_NAME) + if assistant is None: + logging.info( + f"Assistant {ASSISTANT_NAME} not found. Aborting. For instructions on how to create an assistant, refer to the README." + ) + return + + logging.info( + f"Entering the cave! Beware of bats. Type 'exit' to see the sunlight again." + ) + thread = await create_thread() + if thread is None: + logging.error("Failed to create conversation thread.") + return + + try: + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + logging.info("Emerging from the cave, back to the daylight. Goodbye!") + break + + await create_message(thread.id, user_input) + run = await create_a_run(assistant.id, thread.id) + if run is None: + logging.error("Failed to create a run.") + return + await get_responses(thread.id, run.id) + finally: + await delete_thread( + client, thread.id + ) # ensure the thread is deleted when exiting + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/assistant/combine_components.sh b/assistant/combine_components.sh new file mode 100755 index 000000000..fed1ed7f1 --- /dev/null +++ b/assistant/combine_components.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# This script is meant to be run from `create_or_update_assistant.py`. It +# distributes the highest version files from specified directories into a set +# number of parts. + +if [ -z "$1" ]; then + echo "Please specify the number of parts." + exit 1 +fi + +num_parts=$1 +if ! [[ "$num_parts" =~ ^[0-9]+$ ]]; then + echo "The number of parts must be a positive integer." + exit 1 +fi + +output_file_base="noredinkuicomponentspart" +ignore_list=("AnimatedIcon" "AssignmentIcon" "CharacterIcon" "Logo" "MasteryIcon" "Pennant" "Sprite" "UiIcon") + +part=1 +dir_count=0 +total_dirs=$(find ../src/Nri/Ui/ -maxdepth 1 -type d | wc -l) +interval_dirs=$((total_dirs / num_parts)) + +output_file="${output_file_base}${part}.md" +# ensure the output file is empty before starting +: > "$output_file" + + +is_in_ignore_list() { + local folder=$1 + for ignore_folder in "${ignore_list[@]}"; do + if [[ "$folder" == *"$ignore_folder"* ]]; then + return 0 # true, folder is in the ignore list + fi + done + return 1 # false, folder is not in the ignore list +} + +# concatenate the highest version file from a folder to the output file +concatenate_highest_version() { + local folder=$1 + # use find to list files only, then sort and pick the highest version file + highest_version_file=$(find "$folder" -maxdepth 1 -type f | sort -V | tail -n 1) + + if [ ! -z "$highest_version_file" ] && [ -f "$highest_version_file" ]; then + # ensure the file is readable before attempting to concatenate + if [ -r "$highest_version_file" ]; then + echo -e "# $highest_version_file\n" >> "$output_file" + cat "$highest_version_file" >> "$output_file" || { + echo "Failed to read file: $highest_version_file" + return 1 + } + echo -e "\n---\n" >> "$output_file" + else + echo "Cannot read file: $highest_version_file" + fi + fi +} + + +for dir in ../src/Nri/Ui/*/ ; do + if [ -d "$dir" ] && ! is_in_ignore_list "$dir"; then + if [ "$dir_count" -ge "$interval_dirs" ] && [ "$part" -lt "$num_parts" ]; then + part=$((part + 1)) + output_file="${output_file_base}${part}.md" + : > "$output_file" # clear the new output file + interval_dirs=$((interval_dirs + total_dirs / num_parts)) + fi + concatenate_highest_version "$dir" + dir_count=$((dir_count + 1)) + fi +done + +echo "Completed. Contents of the highest version files are distributed across ${num_parts} files." diff --git a/assistant/create_or_update_assistant.py b/assistant/create_or_update_assistant.py new file mode 100644 index 000000000..2ce189110 --- /dev/null +++ b/assistant/create_or_update_assistant.py @@ -0,0 +1,161 @@ +import asyncio +import glob +import logging +import os +import subprocess +from dotenv import load_dotenv +from openai import AsyncOpenAI + + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + + +load_dotenv() + + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +if not OPENAI_API_KEY: + logging.error( + "The `OPENAI_API_KEY` environment variable is not set. For instructions on how to set it, refer to the README." + ) + exit(1) + + +client = AsyncOpenAI(api_key=OPENAI_API_KEY) + +ASSISTANT_NAME = "Cave Echo" +ASSISTANT_INSTRUCTIONS = "You are {name}, a reliable source for information about NoRedInk UI components. Use your knowledge base to answer engineers' questions about these components. Respond exclusively based on content from files uploaded into your knowledge base, ensuring all responses are accurate and relevant. Avoid inferring component APIs not provided in the files, ensuring that all guidance and information provided is directly based on the uploaded materials without speculation or extrapolation beyond the given content. If you are unable to find an answer, let the user know that you are unable to help and suggest asking a human engineer for assistance. Use a friendly tone and adhere to technical writing best practices." +MODEL = "gpt-4-turbo-preview" +TOOLS = [{"type": "retrieval"}] + + +async def retrieve_assistant_by_name(name): + try: + assistants = await client.beta.assistants.list() + for assistant in assistants.data: + if assistant.name == name: + return assistant + return None + except Exception as e: + logging.error(f"Failed to retrieve assistant by name: {e}") + return None + + +async def upload_file(file_path): + try: + with open(file_path, "rb") as file: + upload_response = await client.files.create(file=file, purpose="assistants") + return upload_response.id + except Exception as e: + logging.error(f"Failed to upload file {file_path}: {e}") + return None + + +async def generate_and_upload_files(): + script_path = "./combine_components.sh" + num_parts = "1" + + try: + result = subprocess.run( + ["bash", script_path, num_parts], check=True, capture_output=True, text=True + ) + if result.stderr: + logging.warning( + f"Issues during script execution, but continuing: {result.stderr}" + ) + else: + logging.info("Files generated and ready for upload.") + except subprocess.CalledProcessError as e: + logging.error(f"Script execution failed: {e.stderr}") + return [] + + part_files = glob.glob("./noredinkuicomponentspart*.md") + if not part_files: + logging.error("No files were generated by the script.") + return [] + + upload_tasks = [upload_file(part_file) for part_file in part_files] + file_ids = await asyncio.gather(*upload_tasks, return_exceptions=True) + + # clean up generated files + for part_file in part_files: + try: + os.remove(part_file) + except Exception as e: + logging.warning(f"Failed to delete file {part_file}: {e}") + + return [file_id for file_id in file_ids if file_id is not None] + + +async def list_files(assistant_id): + try: + assistant_files = await client.beta.assistants.files.list( + assistant_id=assistant_id + ) + return assistant_files.data + except Exception as e: + logging.error(f"Failed to list files for assistant {assistant_id}: {e}") + return [] + + +async def delete_file(file_id): + try: + deleted_file = await client.files.delete(file_id) + logging.info(f"Deleted file {file_id} successfully.") + return deleted_file + except Exception as e: + logging.error(f"Failed to delete file {file_id}: {e}") + return None + + +async def delete_old_files(assistant_id): + files = await list_files(assistant_id) + delete_tasks = [delete_file(file.id) for file in files] + await asyncio.gather(*delete_tasks) + + +async def main(): + assistant = await retrieve_assistant_by_name(ASSISTANT_NAME) + file_ids = await generate_and_upload_files() + if not file_ids: + logging.error("No files to upload. Aborting assistant creation/update.") + return + + if assistant: + logging.info(f"Deleting old files for assistant: {ASSISTANT_NAME}") + await delete_old_files(assistant.id) + + logging.info(f"Updating existing assistant: {ASSISTANT_NAME}") + try: + await client.beta.assistants.update( + assistant.id, + name=ASSISTANT_NAME, + instructions=ASSISTANT_INSTRUCTIONS.format(name=ASSISTANT_NAME), + model=MODEL, + tools=TOOLS, + file_ids=file_ids, + ) + logging.info(f"{ASSISTANT_NAME} updated successfully.") + except Exception as e: + logging.error(f"Failed to update assistant: {e}") + else: + logging.info(f"Creating new assistant: {ASSISTANT_NAME}") + try: + assistant = await client.beta.assistants.create( + name=ASSISTANT_NAME, + instructions=ASSISTANT_INSTRUCTIONS.format(name=ASSISTANT_NAME), + model=MODEL, + tools=TOOLS, + file_ids=file_ids, + ) + logging.info( + f"Assistant {ASSISTANT_NAME} created successfully with ID: {assistant.id}" + ) + except Exception as e: + logging.error(f"Failed to create assistant: {e}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/assistant/readme.md b/assistant/readme.md new file mode 100644 index 000000000..c3f3d05db --- /dev/null +++ b/assistant/readme.md @@ -0,0 +1,86 @@ +# Cave Echo + +Cave Echo is a conversational assistant that uses the [Assistants API](https://platform.openai.com/docs/assistants/overview) to generate responses about `noredink-ui`. + +## Quick Start + +If you have already set up the assistant, you can run it with the following commands: + +```bash +source venv/bin/activate +python3 assistant.py +``` + +To exit the assistant, type `exit` and press Enter. + +To deactivate the virtual environment, run `deactivate`. + +## First-time Setup: + +### Prerequisites + +Before you begin, ensure you have the following: + +- Python 3.7 or later. You can install the latest version of Python using Homebrew on macOS with `brew install python`. To check your Python version, run `python3 --version`. +- An OpenAI API key. + +### Setup + +#### OpenAI API Key + +First, navigate to the `noredink-ui` root directory and create a `.env` file with your OpenAI API key: + +```bash +cd path/to/noredink-ui +echo OPENAI_API_KEY=your_api_key_here > .env +``` + +There's a `.env.example` file in the root directory that you can use as a template: + +```bash +cp .env.example .env +``` + +**Important:** Ensure the `.env` file is never committed to the repository by adding it to the `.gitignore` file if it isn't already. + +#### Virtual Environment + +It's recommended to use a virtual environment to manage dependencies without causing conflicts with other Python projects. + +Navigate to the `assistant` directory if you haven't already: + +```bash +cd path/to/noredink-ui/assistant +``` + +Create and activate a virtual environment: + +```bash +python3 -m venv venv +source venv/bin/activate +``` + +**Important:** Ensure the `venv/` directory is never committed to the repository by adding it to the `.gitignore` file if it isn't already. + +#### Dependencies + +Install the required dependencies inside the virtual environment: + +```bash +pip3 install openai python-dotenv +``` + +To deactivate the virtual environment, run `deactivate`. + +### Running the Assistant + +- Ensure you are in the `assistant` directory +- Activate the virtual environment if you haven't already: `source venv/bin/activate` +- Run `python3 assistant.py` + +### Creating or Updating the Assistant + +- Ensure you are in the `assistant` directory +- Activate the virtual environment if you haven't already: `source venv/bin/activate` +- To make changes to the assistant's behavior, edit the `create_or_update_assistant.py` file +- Run `python3 create_or_update_assistant.py`