diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 986fdf4df..21e2486bb 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -27,6 +27,7 @@ jobs: - benchmarks/deeplab - benchmarks/resnet_imagenet - end-to-end-examples/sec_10k_qa + - end-to-end-examples/support-chatbot - end-to-end-examples/stable_diffusion - end-to-end-examples/stable_diffusion_dreambooth - third-party/nemo diff --git a/examples/end-to-end-examples/support_chatbot/README.md b/examples/end-to-end-examples/support_chatbot/README.md index 2ddd8e5fd..89b569ae9 100644 --- a/examples/end-to-end-examples/support_chatbot/README.md +++ b/examples/end-to-end-examples/support_chatbot/README.md @@ -105,9 +105,9 @@ python scripts/conversion/convert_jsonl_to_stream.py \ ## Step 3: Finetuning on our Repository -Next, we will finetune our pretrained base model on the train split of the our data, whether that be PyPi documentation, MosaicML code base, or dolly in order to tune it on data that is in-domain for the end task of answering questions about the mosaic codebase. This process is called "domain tuning," and can be useful for adapting a model that has already been trained on a huge amount of data (e.g. MPT-7b) to a new domain. For this example, we will use the train/validation(/test) splits provided with the dataset, which can be in a variety of different formats. We will use the validation split as validation data, and reserve the test split if avalible for our final testing of our application. +Next, we will finetune our pretrained model on the train split of the our data, whether that be PyPi documentation, MosaicML code base, or dolly in order to tune it on data that is in-domain for the end task of answering questions about the mosaic codebase. This process is called "domain tuning," and can be useful for adapting a model that has already been trained on a huge amount of data (e.g. MPT-7b) to a new domain. For this example, we will use the train/validation(/test) splits provided with the dataset, which can be in a variety of different formats. We will use the validation split as validation data, and reserve the test split if avalible for our final testing of our application. -Please check out the [training yaml](./mcli-yamls/03_finetune_on_10ks.yaml) for all of the details. This yaml will load the pretrained weights for `mpt-7b` available on the [HuggingFace Hub](https://huggingface.co/mosaicml/mpt-7b), and then train using the normal causal language modeling objective on our datasets that we processed in the previous step. The [training script](https://github.com/mosaicml/llm-foundry/blob/main/scripts/train/train.py) itself, is from LLM-foundry. +Please check out the [training directory](./mcli-yamls/finetune) for all of the details. This yaml will load the pretrained weights for `mpt-7b` available on the [HuggingFace Hub](https://huggingface.co/mosaicml/mpt-7b), and then train using the normal causal language modeling objective on our datasets that we processed in the previous step. The [training script](https://github.com/mosaicml/llm-foundry/blob/main/scripts/train/train.py) itself, is from LLM-foundry. To run finetuning, run the following where `composer_codebase` can be replaced with `PyPi` or `dolly_hh` @@ -124,8 +124,6 @@ mcli run -f mcli_yamls/finetune/finetune_composer_codebase.yaml --cluster CLUSTE Before we can deploy our model, we need to convert it into the standard HuggingFace checkpoint folder. We will use the [conversion script](https://github.com/mosaicml/llm-foundry/blob/main/scripts/inference/convert_composer_to_hf.py) from LLM-foundry to do this. This script will take the Composer checkpoint, and write out all the files that HuggingFace expects in a checkpoint folder. You can additionally add the `--hf_repo_for_upload` argument if you would like to upload directly to a private repo on the HuggingFace Hub (you will also need to [set the `HUGGING_FACE_HUB_TOKEN` environment variable](https://docs.mosaicml.com/projects/mcli/en/latest/resources/secrets/env.html) to do this). -Note: this conversion script is _specifically_ for MPT. If you have changed the model to a different HuggingFace model, you can use the `convert_composer_to_hf_transformers.py` script in _this_ repository instead. - **Fields to replace with your values:** `REPLACE_WITH_YOUR_CLUSTER` (in the command), `CLOUD` (in the yaml), `BUCKET_NAME` (in the yaml), `CHECKPOINT_FOLDER_NAME` (in the yaml), `HF_FOLDER_NAME` (in the yaml) **Inputs:** the final checkpoint from step 4 inside `CHECKPOINT_FOLDER_NAME` and where you want the converted checkpoint to go in `HF_FOLDER_NAME` diff --git a/examples/end-to-end-examples/support_chatbot/app_demo.py b/examples/end-to-end-examples/support_chatbot/app_demo.py index 8343f4890..ed40fb8e9 100644 --- a/examples/end-to-end-examples/support_chatbot/app_demo.py +++ b/examples/end-to-end-examples/support_chatbot/app_demo.py @@ -7,6 +7,20 @@ ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) +EVAL_7B_TEMPLATE = (f'Answer the following question as one function, class, or object. If you do not know, just say "I do not know".' + '\n{context}' + '\nQuestion: {question}') + +EVAL_30B_TEMPLATE = ("""<|im_start|>system + A conversation between a user and an LLM-based AI assistant about the codebase for the MosaicML library Composer. + Provide a helpful and simple answer given the following context to the question. If you do not know, just say "I + do not know".<|im_end|> + <|im_start|>context + {context}<|im_end|> + <|im_start|>user + {question}<|im_end|> + <|im_start|>assistant""") + def parse_args() -> Namespace: """Parse commandline arguments.""" parser = ArgumentParser( @@ -17,9 +31,15 @@ def parse_args() -> Namespace: '--endpoint_url', type=str, default='https://models.hosted-on.mosaicml.hosting/mpt-30b-chat/v1/predict', - #default='https://mpt-30b-composer-finetuned-q8mjj9.inf.hosted-on.mosaicml.hosting/predict', + #default='https://mpt-30b-composer-finetuned-dmhpmi.inf.hosted-on.mosaicml.hosting/predict', required=False, help='The endpoint of our MosaicML LLM Model') + parser.add_argument( + '--model_name', + type=str, + default='mpt-30b-chat', + required=False, + help='only evals offered as of now are mpt-30b-chat and mpt-7b') parser.add_argument( '--max_length', type=int, @@ -53,9 +73,25 @@ def parse_args() -> Namespace: parser.add_argument( '--repository_urls', type=str, - default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry', + nargs='*', + default=['https://github.com/mosaicml/composer', + 'https://github.com/mosaicml/streaming', + 'https://github.com/mosaicml/examples', + 'https://github.com/mosaicml/diffusion', + 'https://github.com/mosaicml/llm-foundry'], + required=False, + help='The GitHub repository URLs to download' + ) + parser.add_argument( + '--complex_data_dir', + type=str, + required=False, + help='complex eval data for human eval') + parser.add_argument( + '--simple_data_dir', + type=str, required=False, - help='The GitHub repository URLs to download') + help='simple eval data for string comparison') parser.add_argument( '--complex_chat', type=int, @@ -72,15 +108,18 @@ def parse_args() -> Namespace: return parsed def main(endpoint_url: str, + model_name: str, max_length: int, chunk_size: int, chunk_overlap: int, retrieval_k: int, model_k: int, repository_urls: list[str], + complex_data_dir: str, + simple_data_dir: str, chat_version: int) -> None: - retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_demo') + retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data') embeddings = MosaicMLInstructorEmbeddings() llm = MosaicML( @@ -115,6 +154,22 @@ def chat_wrapper(query: str) -> str: Returns: str: The response from chatbot""" + if query == '!eval_simple': + if simple_data_dir is None: + ValueError('No simple data directory provided. Please provide a directory with simple eval data') + if model_name == 'mpt-30b-chat': + return chatbot.evaluate_simple(simple_data_dir, EVAL_30B_TEMPLATE) + elif model_name == 'mpt-7b': + return chatbot.evaluate_simple(simple_data_dir, EVAL_7B_TEMPLATE) + + elif query == '!eval_complex': + if complex_data_dir is None: + ValueError('No complex data directory provided. Please provide a directory with complex eval data') + if model_name == 'mpt-30b-chat': + return chatbot.evaluate_complex(complex_data_dir, EVAL_30B_TEMPLATE) + elif model_name == 'mpt-7b': + return chatbot.evaluate_complex(complex_data_dir, EVAL_7B_TEMPLATE) + if chat_version == 1: return chatbot.sub_query_chat(query) elif chat_version == 2: @@ -141,11 +196,14 @@ def gradio_chat(): args = parse_args() main( endpoint_url=args.endpoint_url, + model_name=args.model_name, max_length = args.max_length, chunk_size = args.chunk_size, chunk_overlap = args.chunk_overlap, retrieval_k = args.retrieval_k, model_k = args.model_k, repository_urls = args.repository_urls, + complex_data_dir = args.complex_data_dir, + simple_data_dir = args.simple_data_dir, chat_version = args.complex_chat ) \ No newline at end of file diff --git a/examples/end-to-end-examples/support_chatbot/app_slack.py b/examples/end-to-end-examples/support_chatbot/app_slack.py deleted file mode 100644 index 31a4122b4..000000000 --- a/examples/end-to-end-examples/support_chatbot/app_slack.py +++ /dev/null @@ -1,332 +0,0 @@ -import os -import time -import random -import string -from io import BytesIO -import hashlib -import hmac - -from argparse import ArgumentParser, Namespace -from langchain.embeddings import MosaicMLInstructorEmbeddings -from langchain.llms import MosaicML -from chatbot import ChatBot - -from slack_sdk import WebClient -from slack_sdk.errors import SlackApiError -from flask import Flask, request, jsonify -from pyngrok import ngrok - -from oci_converser import OCIObjectStorageManager - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) -app = Flask(__name__) - -BOT_USER_ID = "U05MKGP6J84" -processed_events = set() - -REDIRECT_URI = 'http://localhost:3000/login/callback' - -def parse_args() -> Namespace: - """Parse commandline arguments.""" - parser = ArgumentParser(description='Run a chatbot!') - parser.add_argument('--endpoint_url', type=str, default='https://models.hosted-on.mosaicml.hosting/mpt-30b-chat/v1/predict', required=False, help='The endpoint of our MosaicML LLM Model') - parser.add_argument('--max_length', type=int, default=6000, required=False, help='The maximum size of context from LangChain') - parser.add_argument('--chunk_size', type=int, default=1200, required=False, help='The chunk size when splitting documents') - parser.add_argument('--chunk_overlap', type=int, default=800, required=False, help='The overlap between chunks when splitting documents') - parser.add_argument('--retrieval_k', type=int, default=5, required=False, help='The number of chunks to retrieve as context from vector store') - parser.add_argument('--model_k', type=int, default=10, required=False, help='The number of outputs model should output') - parser.add_argument('--repository_urls', type=str, default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry', required=False, help='The GitHub repository URLs to download') - parser.add_argument('--data_collecting', type=bool, default=False, help='Where successful threads will be stored') - parser.add_argument('--slack_token', type=str, help='Slack Token') - parser.add_argument('--slack_signing_secret', type=str, help='Slack Signing Secret') - parser.add_argument('--oci_data_storage', type=str, default='oci://mosaicml-internal-checkpoints/support-bot-demo/slack-data', help='Where successful threads will be stored') - parser.add_argument('--complex_chat', type=bool, default=False, help='Where successful threads will be stored') - - parsed = parser.parse_args() - if parsed.repository_urls is not None: - parsed.repository_urls = ''.join(str(parsed.repository_urls).split()).split(',') - return parsed - -@app.route('/slack/events', methods=['POST']) -def slack_events(): - data = request.json - # Immediately respond to Slack's challenge - if "challenge" in data: - return jsonify({'challenge': data['challenge']}) - - # After challenge check, verify the Slack request - if not verify_slack_request(request): - return jsonify({'message': 'Unauthorized'}), 401 - - # Deduplication using event timestamp - event_ts = data['event'].get('event_ts', None) - if event_ts in processed_events: - return jsonify({'status': 'already_processed'}) - processed_events.add(event_ts) - - if 'text' in data['event'] and data['event']['type'] == 'message' and f"<@{BOT_USER_ID}>" in data['event']['text']: - channel_id = data['event']['channel'] - thread_ts = data['event'].get('thread_ts', data['event']['ts']) # Default to current message TS if not a thread - - # Fetch entire thread using Slack's API - thread_messages = client.conversations_replies(channel=channel_id, ts=thread_ts)['messages'] - - # Process thread messages - conversation_msgs = [] - question_msg = None - previous_msg = None # Store the previous message - for msg in thread_messages: - user_id = msg['user'] - user_info = client.users_info(user=user_id) - user_name = user_info['user']['name'] - formatted_msg = f"{user_name}: {msg['text']}." - - # Check if the message is just the bot ping - if msg['text'].strip() == f"<@{BOT_USER_ID}>": - if previous_msg: - question_msg = previous_msg - previous_msg = None # Reset previous_msg so it doesn't get added to conversation_msgs - continue - - # Separate the message with the ping - if f"<@{BOT_USER_ID}>" in msg['text']: - question_msg = formatted_msg - else: - conversation_msgs.append(formatted_msg) - previous_msg = formatted_msg - - # If the question_msg is still None after the loop, it means the bot was pinged without any question. - # Set the last message (if any) in the thread as the question in such cases. - if question_msg is None and previous_msg: - question_msg = previous_msg - conversation_msgs.remove(previous_msg) # Remove it from the context as it's now the question - - # Construct the message for the model - conversation = " ".join(conversation_msgs) - if len(conversation) > 2000: - conversation = conversation[-2000:] - user_msg = f"Here is the conversation so far: {conversation} Here is the question: {question_msg}" - - print(user_msg) - - # Respond quickly to Slack - response = jsonify({'status': 'acknowledged'}) - if chat_version: - chat_response = chatbot.sub_query_chat(user_msg) - else: - chat_response = chatbot.chat(user_msg) - - # Post response in the same thread - post_args = {'channel': channel_id, 'text': chat_response, 'thread_ts': thread_ts} - - try: - client.chat_postMessage(**post_args) - except SlackApiError as e: - print(f"Slack API Error: {e}") - except Exception as e: - print(f"Unexpected Error: {e}") - - return response - # Handling reactions added to messages - elif data['event']['type'] == 'reaction_added': - print(f"Detected reaction: {data['event']['reaction']}") - if data['event']['reaction'] == 'white_check_mark': # Checkmark reaction - channel_id = data['event']['item']['channel'] - message_ts = data['event']['item']['ts'] - - # Fetch the entire thread related to the reacted message - root_msg = get_root_message(ts=message_ts, channel=channel_id, need_thread_ts=True) - thread_messages = client.conversations_replies(channel=channel_id, ts=root_msg["thread_ts"])['messages'] - - if contains_checkmark(thread_messages): - save_thread_to_oci(thread_messages, root_msg) - - elif data['event']['type'] == 'reaction_removed': - print(f"Detected reaction removal: {data['event']['reaction']}") - if data['event']['reaction'] == 'white_check_mark': # Checkmark reaction removed - channel_id = data['event']['item']['channel'] - message_ts = data['event']['item']['ts'] - - # Fetch the entire thread related to the reacted message - root_msg = get_root_message(ts=message_ts, channel=channel_id, need_thread_ts=True) - thread_messages = client.conversations_replies(channel=channel_id, ts=root_msg["thread_ts"])['messages'] - remove_thread_from_oci(root_msg) - if contains_checkmark(thread_messages): - # If another checkmark is still present, re-save the new updated thread again - save_thread_to_oci(thread_messages, root_msg) - - return jsonify({'status': 'ok'}) - -def contains_checkmark(thread_messages): - for msg in thread_messages: - if 'reactions' in msg: - for reaction in msg['reactions']: - if reaction['name'] == 'white_check_mark': - return True - return False - -def send_slack_message(client, channel, message): - """Send a message to a Slack channel.""" - try: - client.chat_postMessage(channel=channel, text=message) - except SlackApiError as e: - print(f"Slack API Error: {e}") - -def generate_random_name(length=5): - return ''.join(random.choices(string.ascii_uppercase, k=length)) - -def save_thread_to_oci(thread_messages, root_msg): - buffer = BytesIO() - - # Dictionary to store the mapping of original user names to randomized user names - user_name_mapping = {} - - for msg in thread_messages: - user_id = msg['user'] - user_name = client.users_info(user=user_id)['user']['name'] - - # Check if the user name already has a randomized counterpart - # If not, generate one and store in the dictionary - if user_name not in user_name_mapping: - user_name_mapping[user_name] = generate_random_name() - - # Replace the original user name with its randomized version - randomized_user_name = user_name_mapping[user_name] - - if 'reactions' in msg: - for reaction in msg['reactions']: - if reaction['name'] == 'white_check_mark': - buffer.write(f"Accepted Answer by {randomized_user_name}: {msg['text']}\n".encode('utf-8')) - else: - buffer.write(f"(Context) {randomized_user_name}: {msg['text']}\n".encode('utf-8')) - else: - buffer.write(f"(Context) {randomized_user_name}: {msg['text']}\n".encode('utf-8')) - - # Set the pointer to the beginning of the BytesIO object - buffer.seek(0) - - # Now, upload to OCI - timestamp = root_msg['ts'] - formatted_time = time.strftime('message_from_slack_%Y%m%d%H%M%S', time.gmtime(float(timestamp))) - object_name = f'text{formatted_time}.txt' - - oci_manager.upload_file_obj(buffer, object_name) - -def remove_thread_from_oci(root_msg): - timestamp = root_msg['ts'] - formatted_time = time.strftime('%Y%m%d%H%M%S', time.gmtime(float(timestamp))) - object_name = f'text{formatted_time}.txt' - oci_manager.delete_file_obj(object_name) - -def get_root_message(ts, channel, need_thread_ts=False): - resp = client.conversations_replies(ts=ts, channel=channel) - first_msg = resp["messages"][0] - if not need_thread_ts: - return first_msg - return get_root_message(first_msg["thread_ts"], channel, need_thread_ts=False) - -def verify_slack_request(request): - """ - Verifies that the POST request comes from Slack. - """ - signature = request.headers.get("X-Slack-Signature") - timestamp = request.headers.get("X-Slack-Request-Timestamp") - - # Avoid replay attacks - if abs(time.time() - int(timestamp)) > 60 * 5: # 5 minutes - return False - - # Form the basestring as defined by Slack - basestring = f"v0:{timestamp}:{request.get_data().decode('utf-8')}" - - # Hash the basestring - my_signature = 'v0=' + hmac.new( - bytes(signing_secret, 'utf-8'), - msg=bytes(basestring, 'utf-8'), - digestmod=hashlib.sha256 - ).hexdigest() - - return hmac.compare_digest(my_signature, signature) - -def main(endpoint_url: str, - max_length: int, - chunk_size: int, - chunk_overlap: int, - retrieval_k: int, - model_k: int, - repository_urls: list[str], - data_collecting: bool, - slack_token: str, - slack_signing_secret: str, - oci_data_storage: str, - complex_chat: bool): - - if slack_token is None: - try: - slack_token = os.environ["COMPOSER_BOT_SLACK_TOKEN"] - except KeyError: - ValueError('No slack token provided. Please provide a slack token or set the SLACK_BOT_TOKEN environment variable') - - if slack_signing_secret is None: - try: - slack_signing_secret = os.environ["SLACK_SIGNING_SECRET"] - except KeyError: - ValueError('No slack signing secret provided. Please provide a slack signing secret or set the SLACK_BOT_TOKEN environment variable') - - global chatbot, client, oci_manager, read_slack, signing_secret, chat_version - oci_manager = OCIObjectStorageManager(oci_uri=oci_data_storage) - read_slack = data_collecting - signing_secret = slack_signing_secret - chat_version = complex_chat - - retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_slack') - - embeddings = MosaicMLInstructorEmbeddings() - llm = MosaicML( - inject_instruction_format=True, - endpoint_url=endpoint_url, - model_kwargs={'output_len': max_length, 'top_k': model_k, 'top_p': 0.95, 'temperature': 0.1} - ) - - chatbot = ChatBot(data_path=retrieval_dir, - embedding=embeddings, - model=llm, - k=retrieval_k, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - slack_path=oci_data_storage) - - if not os.path.isfile(os.path.join(retrieval_dir, 'vectors.pickle')): - if repository_urls is None: - raise ValueError('No repository URLs provided. Please provide a comma separated list of URLs to download') - chatbot.create_vector_store(repository_urls=repository_urls) - - client = WebClient(token=slack_token) - existing_tunnels = ngrok.get_tunnels() - for tunnel in existing_tunnels: - if "http://127.0.0.1:3000" in tunnel.public_url: - public_url = tunnel.public_url - break - else: - public_url = ngrok.connect(3000) - print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}/\"".format(public_url, 3000)) - - for rule in app.url_map.iter_rules(): - print(rule) - - app.run(port=3000, debug=False) - -if __name__ == "__main__": - args = parse_args() - main(endpoint_url=args.endpoint_url, - max_length=args.max_length, - chunk_size=args.chunk_size, - chunk_overlap=args.chunk_overlap, - retrieval_k=args.retrieval_k, - model_k=args.model_k, - repository_urls=args.repository_urls, - data_collecting=args.data_collecting, - slack_token=args.slack_token, - slack_signing_secret=args.slack_signing_secret, - oci_data_storage=args.oci_data_storage, - complex_chat=args.complex_chat) \ No newline at end of file diff --git a/examples/end-to-end-examples/support_chatbot/chatbot.py b/examples/end-to-end-examples/support_chatbot/chatbot.py index c799c5332..38b421096 100644 --- a/examples/end-to-end-examples/support_chatbot/chatbot.py +++ b/examples/end-to-end-examples/support_chatbot/chatbot.py @@ -2,6 +2,7 @@ import json import re import string +import time from tqdm import tqdm import langchain @@ -19,7 +20,6 @@ sys.path.append(ROOT_DIR) from repo_downloader import RepoDownloader from web_downloader import WebScraper -from oci_converser import OCIObjectStorageManager class RetrieverWithScore(BaseRetriever): """Just a custom retriever to track distance between query and retrieval docs @@ -63,7 +63,7 @@ def get_relevant_documents(self, query: str) -> list[Document]: '\n{context}' '\nQuestion: {question}') -EVAL_30B_TEMPLATE = ("""<|im_start|>system +STANDARD_30B_TEMPLATE = ("""<|im_start|>system A conversation between a user and an LLM-based AI assistant about the codebase for the MosaicML library Composer. Provide a helpful and simple answer given the following context to the question. If you do not know, just say "I do not know".<|im_end|> @@ -117,10 +117,6 @@ def get_relevant_documents(self, query: str) -> list[Document]: {{question}}<|im_end|> <|im_start|>assistant""") - -EVAL_SIMPLE_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/composer_docstrings.jsonl') -EVAL_COMPLEX_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/complex_eval.jsonl') - class ChatBot: """Given a folder of .txt files from data_path, create a Chatbot object that can process the files into documents, split them into managable sizes, and store them in a vector store. The Chatbot can then be used to answer questions about the documents. @@ -161,7 +157,6 @@ def __init__(self, chunk_size: int, chunk_overlap: int, k: int, - slack_path: str = False, ) -> None: self.data_path = data_path @@ -170,13 +165,11 @@ def __init__(self, self.chunk_size = chunk_size self.chunk_overlap = chunk_overlap self.k = k - self.saved_state = {'k': k, 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap, 'model_k': model.model_kwargs['top_k'], - 'endpoint_url': model.endpoint_url} + self.saved_state = {'k': k, 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap, 'model_k': model.model_kwargs['top_k']} self.chat_chain = None self.intent_chain = None self.subchain = None self.subsubchain = None - self.slack_path = slack_path self.vector_store = None if os.path.isfile(os.path.join(data_path, 'vectors.pickle')): @@ -292,14 +285,9 @@ def store_vectors(self, embedding=self.embedding ) - if self.slack_path: - with open(os.path.join(ROOT_DIR, 'retrieval_data_slack/vectors.pickle'), 'wb') as f: - pickle.dump(vector_store, f) - self.vector_store = vector_store - else: - with open(os.path.join(ROOT_DIR, 'retrieval_data_demo/vectors.pickle'), 'wb') as f: - pickle.dump(vector_store, f) - self.vector_store = vector_store + with open(os.path.join(ROOT_DIR, 'retrieval_data/vectors.pickle'), 'wb') as f: + pickle.dump(vector_store, f) + self.vector_store = vector_store def create_vector_store(self, repository_urls) -> None: """Download the repositories, load the data, split the data into chunks, and store the chunks in a vector store. @@ -314,11 +302,6 @@ def create_vector_store(self, repository_urls) -> None: if os.path.exists(downloader.clone_dir): continue downloader.download_repo() - if self.slack_path: - oci_manager = OCIObjectStorageManager(oci_uri=self.slack_path) - if not os.path.exists(os.path.join(self.data_path, 'slack_data')): - os.makedirs(os.path.join(self.data_path, 'slack_data')) - oci_manager.download_directory(os.path.join(self.data_path, 'slack_data')) pages = self.load_data() documents = self.split_pages(pages) @@ -327,7 +310,7 @@ def create_vector_store(self, repository_urls) -> None: def create_chain(self, prompt_template: str, - score_threshold: int= 0.4) -> RetrievalQA: + score_threshold: int=0.4) -> RetrievalQA: """Create a RetrievalQAWithScores given a prompt template. Args: @@ -396,21 +379,18 @@ def replace_underscore(s): return white_space_fix(remove_parentheses(remove_articles(handle_punc(lower(replace_underscore(answer)))))).strip() - def set_eval_state(self, - endpoint_url: str) -> None: + def set_eval_state(self) -> None: """Set the state of the chatbot to the evaluation state. This is used to change the chunk size, chunk overlap, and k""" self.chunk_overlap = 150 self.chunk_size = 750 self.k = 1 self.model.model_kwargs['output_len'] = 40 - self.model.endpoint_url = endpoint_url def reload_chat_state(self) -> None: """Reload the chatbot state to the saved state the user set when creating the chatbot""" self.chunk_overlap = self.saved_state['chunk_overlap'] self.chunk_size = self.saved_state['chunk_size'] self.k = self.saved_state['k'] - self.model.endpoint_url = self.saved_state['endpoint_url'] def evaluate_simple(self, data_path: str, @@ -424,7 +404,7 @@ def evaluate_simple(self, Returns: str: The score of the chatbot on the dataset including number of exact matches, close matches, and total questions """ - chain = self.create_chain(answer_question_string_template) + chain = self.create_chain(prompt_template=answer_question_string_template) exact_match = 0 close_match = 0 total = 1 @@ -445,6 +425,7 @@ def evaluate_simple(self, print('\n', self.normalize_str(answer), '||', self.normalize_str(continuation), '\n') print(f'{exact_match} exact matches and {close_match} close matches out of {total} questions.') total += 1 + time.sleep(0.5) return f'Given Score: {(exact_match + 0.5*close_match)/ total} with {exact_match} exact matches and {close_match} close matches out of {total} questions.' def evaluate_complex(self, @@ -459,7 +440,7 @@ def evaluate_complex(self, Returns: A long string of all questions, answers, and responses """ - chain = self.create_chain(answer_question_string_template) + chain = self.create_chain(prompt_template=answer_question_string_template) total_lines = sum(1 for _ in open(data_path)) with open(data_path, 'r') as file: save = '' @@ -478,13 +459,13 @@ def sub_query_chat(self, if not self.intent_chain: save_k = self.k self.k = 5 - self.intent_chain = self.create_chain(SUBQUERY_INTENT_TEMPLATE) + self.intent_chain = self.create_chain(prompt_template=SUBQUERY_INTENT_TEMPLATE) self.k = save_k intent_response = self.intent_chain(query) intent_answer = self.clean_response(intent_response['result'].lstrip('\n')) SUBQUERY_SUBQA_TEMPLATE = PARTIAL_SUBQA_TEMPLATE.format(intent_answer) - subQA_chain = self.create_chain(SUBQUERY_SUBQA_TEMPLATE) + subQA_chain = self.create_chain(prompt_template=SUBQUERY_SUBQA_TEMPLATE) subQA_response = subQA_chain(query) subQA_answer = self.clean_response(subQA_response['result'].lstrip('\n')) @@ -492,7 +473,7 @@ def sub_query_chat(self, sub_QA_injection = '' # Don't create a new chain on every query if not self.subchain: - self.subchain = self.create_chain(prompt_template=EVAL_30B_TEMPLATE, score_threshold=threshold) + self.subchain = self.create_chain(prompt_template=STANDARD_30B_TEMPLATE, score_threshold=threshold) for sub_QA in all_sub_QA: if sub_QA: response = self.subchain(sub_QA) @@ -503,18 +484,13 @@ def sub_query_chat(self, if sub_QA_injection: SUBQUERY_COMBINE_TEMPLATE = PARTIAL_COMBINE_TEMPLATE.format(str(sub_QA_injection).replace("{", "{{").replace("}", "}}")) - combine_chain = self.create_chain(f'{SUBQUERY_COMBINE_TEMPLATE}') + combine_chain = self.create_chain(prompt_template=SUBQUERY_COMBINE_TEMPLATE) combine_response = combine_chain(query) combine_answer = self.clean_response(combine_response['result'].lstrip('\n')) combine_answer_sources = '' - slack_deduplicate = True for d in combine_response['source_documents']: if d.metadata["score"] > 0.6: - if 'message_from_slack' == combine_answer_sources[:18] and slack_deduplicate: - combine_answer_sources = combine_answer_sources + 'slack_data\n' - slack_deduplicate = False - else: - combine_answer_sources = combine_answer_sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n' + combine_answer_sources = combine_answer_sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n' if not combine_answer_sources: return f'Answer: \n{str(combine_answer)}\n\nIntent: \n{str(intent_answer)}\n\n Sub-questions: \n{str(sub_QA_injection)}' @@ -530,13 +506,13 @@ def relation_sub_query_chat(self, if not self.intent_chain: save_k = self.k self.k = 3 - self.intent_chain = self.create_chain(SUBQUERY_INTENT_TEMPLATE) + self.intent_chain = self.create_chain(prompt_template=SUBQUERY_INTENT_TEMPLATE) self.k = save_k intent_response = self.intent_chain(query) intent_answer = self.clean_response(intent_response['result'].lstrip('\n')) SUBQUERY_SUBQA_TEMPLATE = PARTIAL_SUBQA_TEMPLATE.format(intent_answer) - subQA_chain = self.create_chain(SUBQUERY_SUBQA_TEMPLATE) + subQA_chain = self.create_chain(prompt_template=SUBQUERY_SUBQA_TEMPLATE) subQA_response = subQA_chain(query) subQA_answer = self.clean_response(subQA_response['result'].lstrip('\n')) @@ -553,25 +529,20 @@ def relation_sub_query_chat(self, answerable = self.clean_response(self.subsubchain(sub_QA)['result'].lstrip('\n')) if "Yes" in answerable: if not self.subchain: - self.subchain = self.create_chain(EVAL_30B_TEMPLATE) + self.subchain = self.create_chain(prompt_template=STANDARD_30B_TEMPLATE) response = self.subchain(sub_QA) answer = self.clean_response(response['result'].lstrip('\n')) sub_QA_injection += f'Question: {sub_QA} \nAnswer: {answer}\n' if sub_QA_injection: SUBQUERY_COMBINE_TEMPLATE = PARTIAL_COMBINE_TEMPLATE.format(str(sub_QA_injection).replace("{", "{{").replace("}", "}}")) - combine_chain = self.create_chain(SUBQUERY_COMBINE_TEMPLATE) + combine_chain = self.create_chain(prompt_template=SUBQUERY_COMBINE_TEMPLATE) combine_response = combine_chain(query) combine_answer = self.clean_response(combine_response['result'].lstrip('\n')) sources = '' - slack_deduplicate = True for d in combine_response['source_documents']: if d.metadata["score"] > threshold: - if 'message_from_slack' == sources[:18] and slack_deduplicate: - sources = sources + 'slack_data\n' - slack_deduplicate = False - else: - sources = sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n' + sources = sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n' if not sources: return f'Answer: \n{str(combine_answer)}\n\nIntent: \n{str(intent_answer)}\n\n Sub-questions: \n{str(sub_QA_injection)}' else: @@ -587,40 +558,16 @@ def chat(self, query (str): The query to ask the chatbot """ - if query == "!eval_7b": - self.set_eval_state(endpoint_url='https://chatbot-7b-finetuned-rxyfc5.inf.hosted-on.mosaicml.hosting/predict') - score = self.evaluate_simple(EVAL_SIMPLE_DIR, EVAL_7B_TEMPLATE) - self.reload_chat_state() - print(score) - return score - elif query == "!eval_7b_complex": - self.model.endpoint_url = 'https://chatbot-7b-finetuned-rxyfc5.inf.hosted-on.mosaicml.hosting/predict' - out = self.evaluate_complex(EVAL_COMPLEX_DIR, EVAL_7B_TEMPLATE) - self.model.endpoint_url = self.saved_state['endpoint_url'] - return out - elif query == "!eval_30b": - score = self.evaluate_simple(EVAL_SIMPLE_DIR, EVAL_30B_TEMPLATE) - print(score) - return score - elif query == "!eval_30b_complex": - out = self.evaluate_complex(EVAL_30B_TEMPLATE, EVAL_30B_TEMPLATE) - return out + # Don't create a new chain on every query + if not self.chat_chain: + self.chat_chain = self.create_chain(prompt_template=STANDARD_30B_TEMPLATE, score_threshold=0) + response = self.chat_chain(query) + answer = self.clean_response(response['result'].lstrip('\n')) + sources = '' + for d in response['source_documents']: + if d.metadata["score"] > 0.6: + sources = sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n' + if not sources: + return f"Answer: \n{answer}" else: - # Don't create a new chain on every query - if not self.chat_chain: - self.chat_chain = self.create_chain(prompt_template=EVAL_30B_TEMPLATE, score_threshold=0) - response = self.chat_chain(query) - answer = self.clean_response(response['result'].lstrip('\n')) - sources = '' - slack_deduplicate = True - for d in response['source_documents']: - if d.metadata["score"] > 0.6: - if 'message_from_slack' == sources[:18] and slack_deduplicate: - sources = sources + 'slack_data\n' - slack_deduplicate = False - else: - sources = sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n' - if not sources: - return f"Answer: \n{answer}" - else: - return f"Answer: \n{answer} \nSources: \n{sources}" \ No newline at end of file + return f"Answer: \n{answer} \nSources: \n{sources}" \ No newline at end of file diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/deploy_llm.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/deploy_llm.yaml index d3d36ece2..48e0dfdb5 100644 --- a/examples/end-to-end-examples/support_chatbot/mcli_yamls/deploy_llm.yaml +++ b/examples/end-to-end-examples/support_chatbot/mcli_yamls/deploy_llm.yaml @@ -25,16 +25,16 @@ command: | export PYTHONPATH=$PYTHONPATH:/code/llm-foundry:/code/examples:/code pip uninstall packaging -y rm /usr/lib/python3/dist-packages/packaging-23.1.dist-info/REQUESTED - pip install composer[streaming,libcloud,oci]==0.16.0 + pip install composer[streaming,libcloud,oci]==0.14.1 pip install packaging==23.1 model: backend: faster_transformers downloader: examples.end-to-end-examples.support_chatbot.scripts.deployment_download_helper.download_and_convert download_parameters: - remote_uri: CLOUD://BUCKET_NAME/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/ + remote_uri: oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/ gpus: 4 - model_handler: examples.end-to-end-examples.support_chatbot.custom_mpt_ft_handler.MPTFTModelHandler # Use the provided MPT handler + model_handler: examples.inference-deployments.mpt.mpt_ft_handler.MPTFTModelHandler # Use the provided MPT handler model_parameters: ft_lib_path: /code/FasterTransformer/build/lib/libth_transformer.so # FT checkpoint path is hardcoded in MPTFTModelHandler at /tmp/mpt diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml index fe8b50a97..eb10d20f1 100644 --- a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml +++ b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml @@ -73,12 +73,6 @@ parameters: attn_impl: triton attn_uses_sequence_id: false - # Tokenizer - tokenizer: - name: mosaicml/mpt-30b - kwargs: - model_max_length: ${max_seq_len} - train_loader: name: finetuning dataset: diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_PyPi.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_PyPi.yaml index cf59adcb0..c774d07b9 100644 --- a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_PyPi.yaml +++ b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_PyPi.yaml @@ -69,7 +69,7 @@ parameters: # Tokenizer # This section is used by LLM-foundry to construct the tokenizer tokenizer: - name: mosaicml/mpt-30b # This can be changed along with the model + name: mosaicml/mpt-30b-chat # This can be changed along with the model kwargs: model_max_length: ${max_seq_len} pad_token: <|endoftext|> diff --git a/examples/end-to-end-examples/support_chatbot/oci_converser.py b/examples/end-to-end-examples/support_chatbot/oci_converser.py deleted file mode 100644 index 11ea8784e..000000000 --- a/examples/end-to-end-examples/support_chatbot/oci_converser.py +++ /dev/null @@ -1,94 +0,0 @@ -import oci -import os - -class OCIObjectStorageManager: - def __init__(self, - oci_uri: str, - config_path="~/.oci/config", - profile_name="DEFAULT"): - self.config = oci.config.from_file(config_path, profile_name) - self.object_storage_client = oci.object_storage.ObjectStorageClient(self.config) - self.bucket_name, self.path = self.parse_oci_uri(oci_uri) - self.namespace = self._get_namespace() - - def parse_oci_uri(self, oci_uri): - """ - Parse an OCI URI into its components. - - Args: - - oci_uri (str): The OCI URI to parse. - - Returns: - - tuple: A tuple containing the bucket name and object path. - """ - if not oci_uri.startswith("oci://"): - raise ValueError(f"Invalid OCI URI: {oci_uri}") - - # Remove the "oci://" prefix - stripped = oci_uri[6:] - - # Split into bucket name and object path - parts = stripped.split("/", 1) - bucket_name = parts[0] - - object_path = None - if len(parts) > 1: - object_path = parts[1] - - return bucket_name, object_path - - def _get_namespace(self): - """Fetch the Object Storage namespace.""" - return self.object_storage_client.get_namespace().data - - def _get_oci_object_name(self, filename): - """Get the object name with oci_path.""" - return os.path.join(self.path, filename) - - def upload_file_obj(self, file_obj, object_name): - """Upload a BytesIO object to an OCI bucket.""" - object_name = self._get_oci_object_name(object_name) - print(f"Uploading {object_name}...") - self.object_storage_client.put_object( - self.namespace, - self.bucket_name, - object_name, - file_obj - ) - print(f"Uploaded {object_name} to {self.bucket_name}/{object_name}.") - - def upload_directory(self, local_path): - """Upload all files from a local directory to an OCI bucket.""" - for filename in os.listdir(local_path): - filepath = os.path.join(local_path, filename) - with open(filepath, 'rb') as file_obj: - self.upload_file_obj(file_obj, filename, self.bucket_name) - - def download_directory(self, local_path): - """Download all files from an OCI bucket to a local directory.""" - prefix = self.path - objects = self.object_storage_client.list_objects(self.namespace, self.bucket_name, prefix=prefix) - - for obj in objects.data.objects: - self.download_file(local_path, obj.name) - - def download_file(self, local_path, object_name): - """Download a single file from an OCI bucket to a local directory.""" - print(f"Downloading {object_name}...") - - response = self.object_storage_client.get_object(self.namespace, self.bucket_name, object_name) - local_filename = os.path.basename(object_name) - with open(os.path.join(local_path, local_filename), 'wb') as file: - for chunk in response.data.raw.stream(1024 * 1024, decode_content=False): - file.write(chunk) - - print(f"Downloaded {object_name} to {local_path}.") - -if __name__ == '__main__': - # Edit these values accordingly - local_path = "/path/to/local/directory" # Path to local directory - oci_path = "YOUR_OCI_PATH" # Folder or prefix in OCI - - manager = OCIObjectStorageManager(oci_path) - manager.upload_directory(local_path) - manager.download_directory(local_path) \ No newline at end of file diff --git a/examples/end-to-end-examples/support_chatbot/repo_downloader.py b/examples/end-to-end-examples/support_chatbot/repo_downloader.py index 771eb02ed..eb42c3a4b 100644 --- a/examples/end-to-end-examples/support_chatbot/repo_downloader.py +++ b/examples/end-to-end-examples/support_chatbot/repo_downloader.py @@ -117,12 +117,8 @@ def download_repo(self) -> str: if file.endswith(('.yaml', '.py', '.md')): full_file_path = os.path.join(root, file) _, ext = os.path.splitext(full_file_path) - if ext == '.yaml': - self.yaml_to_txt(full_file_path) - elif ext == '.py': - self.py_to_txt(full_file_path) - elif ext == '.md': - self.md_to_txt(full_file_path) + if ext == '.yaml' or ext == '.py' or ext == '.md': + self.file_to_txt(full_file_path) else: print(f'Unsupported file type: {ext}') diff --git a/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle b/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle deleted file mode 100644 index 7d8928b51..000000000 Binary files a/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle and /dev/null differ