diff --git a/examples/end-to-end-examples/support_chatbot/app_demo.py b/examples/end-to-end-examples/support_chatbot/app_demo.py index 5b195b2ce..d8114076e 100644 --- a/examples/end-to-end-examples/support_chatbot/app_demo.py +++ b/examples/end-to-end-examples/support_chatbot/app_demo.py @@ -22,19 +22,19 @@ def parse_args() -> Namespace: parser.add_argument( '--max_length', type=int, - default=1000, + default=6000, required=False, - help='The maximum size of context from LangChain') + help='The maximum size tokens in model') parser.add_argument( '--chunk_size', type=int, - default=1000, + default=800, required=False, help='The chunk size when splitting documents') parser.add_argument( '--chunk_overlap', type=int, - default=300, + default=400, required=False, help='The overlap between chunks when splitting documents') parser.add_argument( @@ -55,6 +55,12 @@ def parse_args() -> Namespace: default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry', required=False, help='The GitHub repository URLs to download') + parser.add_argument( + '--complex_chat', + type=bool, + default=False, + required=False, + help='Whether to use subquery chatting') parsed = parser.parse_args() @@ -70,7 +76,8 @@ def main(endpoint_url: str, chunk_overlap: int, retrieval_k: int, model_k: int, - repository_urls: list[str]) -> None: + repository_urls: list[str], + chat_version: bool) -> None: retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_demo') @@ -106,7 +113,10 @@ def chat_wrapper(query: str) -> str: Returns: str: The response from chatbot""" - return chatbot.chat(query) + if chat_version: + return chatbot.sub_query_chat(query) + else: + return chatbot.chat(query) def gradio_chat(): """Simple gradio application for querying the model""" @@ -133,4 +143,5 @@ def gradio_chat(): retrieval_k = args.retrieval_k, model_k = args.model_k, repository_urls = args.repository_urls, + chat_version = args.complex_chat ) \ No newline at end of file diff --git a/examples/end-to-end-examples/support_chatbot/app_slack.py b/examples/end-to-end-examples/support_chatbot/app_slack.py index a73baf1d5..8357b8748 100644 --- a/examples/end-to-end-examples/support_chatbot/app_slack.py +++ b/examples/end-to-end-examples/support_chatbot/app_slack.py @@ -30,9 +30,9 @@ def parse_args() -> Namespace: """Parse commandline arguments.""" parser = ArgumentParser(description='Run a chatbot!') parser.add_argument('--endpoint_url', type=str, default='https://models.hosted-on.mosaicml.hosting/mpt-30b-chat/v1/predict', required=False, help='The endpoint of our MosaicML LLM Model') - parser.add_argument('--max_length', type=int, default=1000, required=False, help='The maximum size of context from LangChain') - parser.add_argument('--chunk_size', type=int, default=1000, required=False, help='The chunk size when splitting documents') - parser.add_argument('--chunk_overlap', type=int, default=300, required=False, help='The overlap between chunks when splitting documents') + parser.add_argument('--max_length', type=int, default=1200, required=False, help='The maximum size of context from LangChain') + parser.add_argument('--chunk_size', type=int, default=1200, required=False, help='The chunk size when splitting documents') + parser.add_argument('--chunk_overlap', type=int, default=800, required=False, help='The overlap between chunks when splitting documents') parser.add_argument('--retrieval_k', type=int, default=5, required=False, help='The number of chunks to retrieve as context from vector store') parser.add_argument('--model_k', type=int, default=10, required=False, help='The number of outputs model should output') parser.add_argument('--repository_urls', type=str, default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry', required=False, help='The GitHub repository URLs to download') @@ -40,6 +40,7 @@ def parse_args() -> Namespace: parser.add_argument('--slack_token', type=str, help='Slack Token') parser.add_argument('--slack_signing_secret', type=str, help='Slack Signing Secret') parser.add_argument('--oci_data_storage', type=str, default='oci://mosaicml-internal-checkpoints/support-bot-demo/slack-data', help='Where successful threads will be stored') + parser.add_argument('--complex_chat', type=bool, default=False, help='Where successful threads will be stored') parsed = parser.parse_args() if parsed.repository_urls is not None: @@ -103,15 +104,18 @@ def slack_events(): # Construct the message for the model conversation = " ".join(conversation_msgs) - if len(conversation) > 1000: - conversation = conversation[-1000:] + if len(conversation) > 2000: + conversation = conversation[-2000:] user_msg = f"Here is the conversation so far: {conversation} Here is the question: {question_msg}" print(user_msg) # Respond quickly to Slack response = jsonify({'status': 'acknowledged'}) - chat_response = chatbot.chat(user_msg) + if chat_version: + chat_response = chatbot.sub_query_chat(user_msg) + else: + chat_response = chatbot.chat(user_msg) # Post response in the same thread post_args = {'channel': channel_id, 'text': chat_response, 'thread_ts': thread_ts} @@ -245,14 +249,6 @@ def verify_slack_request(request): return hmac.compare_digest(my_signature, signature) -# @app.before_request -# def before_slack_event_request(): -# """ -# Before processing a request, verify it's from Slack. -# """ -# if not verify_slack_request(request): -# return jsonify({'message': 'Unauthorized'}), 401 - def main(endpoint_url: str, max_length: int, chunk_size: int, @@ -263,7 +259,8 @@ def main(endpoint_url: str, data_collecting: bool, slack_token: str, slack_signing_secret: str, - oci_data_storage: str): + oci_data_storage: str, + complex_chat: bool): if slack_token is None: try: @@ -277,10 +274,11 @@ def main(endpoint_url: str, except KeyError: ValueError('No slack signing secret provided. Please provide a slack signing secret or set the SLACK_BOT_TOKEN environment variable') - global chatbot, client, oci_manager, read_slack, signing_secret + global chatbot, client, oci_manager, read_slack, signing_secret, chat_version oci_manager = OCIObjectStorageManager(oci_uri=oci_data_storage) read_slack = data_collecting signing_secret = slack_signing_secret + chat_version = complex_chat retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_slack') @@ -331,4 +329,5 @@ def main(endpoint_url: str, data_collecting=args.data_collecting, slack_token=args.slack_token, slack_signing_secret=args.slack_signing_secret, - oci_data_storage=args.oci_data_storage) \ No newline at end of file + oci_data_storage=args.oci_data_storage, + complex_chat=args.complex_chat) \ No newline at end of file diff --git a/examples/end-to-end-examples/support_chatbot/chatbot.py b/examples/end-to-end-examples/support_chatbot/chatbot.py index 98306201d..b4de334a6 100644 --- a/examples/end-to-end-examples/support_chatbot/chatbot.py +++ b/examples/end-to-end-examples/support_chatbot/chatbot.py @@ -65,6 +65,41 @@ def get_relevant_documents(self, query: str) -> list[Document]: <|im_start|>user {question}<|im_end|> <|im_start|>assistant""") +SUBQUERY_INTENT_TEMPLATE = ("""<|im_start|>system + A conversation between a user and an LLM-based AI assistant about the codebase for MosaicML. + Provide a helpful, short and simple answer given the following context to the question. If you do not know, just say "I + do not know".<|im_end|> + <|im_start|>context + {context}<|im_end|> + <|im_start|>user + Why would the user ask the following question: {question}<|im_end|> + <|im_start|>assistant""") +PARTIAL_SUBQA_TEMPLATE = ("""<|im_start|>system + A conversation between a user and an LLM-based AI assistant about the codebase for MosaicML. + Given the context, the job of the assistant is to determine if the context is useful for answering the user's question. + If so, the assistant will break the question into smaller questions that can likely be answered by a single section of + the relevant context. If the context is not directly related to the user's question, the assistant will just break the + question into simpler questions not related to the context that may be helpful for answering the question.<|im_end|> + <|im_start|>context + {{context}}<|im_end|> + <|im_start|>user + {{question}} {} Can this question be answered with the context given alone? If so, break the question down into at most five + smaller questions that can likely be answered by a single section of the relevant documentation. If not, break the + question down into at most five helpful questions. + Please only respond with a list of smaller questions without any extra information.<|im_end|> + <|im_start|>assistant""") +PARTIAL_COMBINE_TEMPLATE = ("""<|im_start|>system + A conversation between a user and an LLM-based AI assistant. + Here are smaller questions regarding the user's question. If you don't know answer pretend like + the question doesn't exist: + {} + Provide a helpful and in depth answer given the following context to the question. + If you do not know, just say "I do not know".<|im_end|> + <|im_start|>context + {{context}}<|im_end|> + <|im_start|>user + {{question}}<|im_end|> + <|im_start|>assistant""") EVAL_SIMPLE_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/composer_docstrings.jsonl') EVAL_COMPLEX_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/complex_eval.jsonl') @@ -121,6 +156,7 @@ def __init__(self, self.saved_state = {'k': k, 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap, 'model_k': model.model_kwargs['top_k'], 'endpoint_url': model.endpoint_url} self.chat_chain = None + self.intent_chain = None self.slack_path = slack_path self.vector_store = None @@ -358,8 +394,8 @@ def reload_chat_state(self) -> None: self.model.endpoint_url = self.saved_state['endpoint_url'] def evaluate_simple(self, - data_path: str, - answer_question_string_template: str) -> str: + data_path: str, + answer_question_string_template: str) -> str: """Evaluate the chatbot on simple retrieval dataset given a data_path and a chain Args: @@ -394,8 +430,8 @@ def evaluate_simple(self, return f'Given Score: {(exact_match + 0.5*close_match)/ total} with {exact_match} exact matches and {close_match} close matches out of {total} questions.' def evaluate_complex(self, - data_path: str, - answer_question_string_template: str) -> str: + data_path: str, + answer_question_string_template: str) -> str: """Evaluate the chatbot on complex eval dataset given a data_path and a chain Args: @@ -419,6 +455,36 @@ def evaluate_complex(self, save += f'Question:\n{question}\nAnswer:\n{continuation}\nResponse:\n{answer}\n\n' return save + def sub_query_chat(self, + query: str)-> str: + if not self.intent_chain: + self.intent_chain = self.create_chain(SUBQUERY_INTENT_TEMPLATE) + intent_response = self.intent_chain(query) + intent_answer = self.clean_response(intent_response['result'].lstrip('\n')) + + SUBQUERY_SUBQA_TEMPLATE = PARTIAL_SUBQA_TEMPLATE.format(intent_answer) + subQA_chain = self.create_chain(SUBQUERY_SUBQA_TEMPLATE) + subQA_response = subQA_chain(query) + subQA_answer = self.clean_response(subQA_response['result'].lstrip('\n')) + + SUBQUERY_COMBINE_TEMPLATE = PARTIAL_COMBINE_TEMPLATE.format(subQA_answer) + combine_chain = self.create_chain(SUBQUERY_COMBINE_TEMPLATE) + combine_response = combine_chain(query) + combine_answer = self.clean_response(combine_response['result'].lstrip('\n')) + sources = '' + slack_deduplicate = True + for d in combine_response['source_documents']: + if d.metadata["score"] < 0.6: + if 'message_from_slack' == sources[:18] and slack_deduplicate: + sources = sources + 'slack_data\n' + slack_deduplicate = False + else: + sources = sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n' + if not sources: + return f'Answer: \n{combine_answer}\n\nIntent: \n{intent_answer}\n\nSub-questions: \n{subQA_answer}' + else: + return f'Answer: \n{combine_answer}\n\nIntent: \n{intent_answer}\n\nSub-questions: \n{subQA_answer}\nSources: \n{sources}' + def chat(self, query: str) -> str: """Chat with the chatbot given a query diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml index e83b52580..b9ed9b7a5 100644 --- a/examples/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml +++ b/examples/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml @@ -22,8 +22,8 @@ integrations: command: | cd llm-foundry/scripts/inference python convert_composer_to_hf.py \ - --composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink \ - --hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/ \ + --composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/latest-rank0.pt.symlink \ + --hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer_chatv2-hf/ \ --output_precision bf16 \ image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 # Use the Docker image provided by MosaicML \ No newline at end of file diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml index 6032b7ea9..b8e20f799 100644 --- a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml +++ b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml @@ -1,7 +1,7 @@ -name: mpt-30b-chat_composer_chatv2 +name: mpt-30b-PyPi_composer_chatv2 scheduling: - resumable: false + resumable: true priority: low compute: @@ -43,13 +43,13 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest # Use the Docker image provided b # See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code parameters: # Path to load the weights from the previous step - load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink + load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi_composer/latest-rank0.pt.symlink load_weights_only: false # Only load the weights for finetuning, discarding any other state from previous training # Checkpoint to local filesystem or remote object store save_interval: 2ep # How frequently to save checkpoints save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK - save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/ + save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-PyPi_composer_chatv2/ save_weights_only: false dist_timeout: 600000 # Set large dist_timeout to allow for checkpoint uploading on a slow connection @@ -69,9 +69,8 @@ parameters: model: name: hf_causal_lm pretrained: false - pretrained_model_name_or_path: mosaicml/mpt-30b-chat + pretrained_model_name_or_path: mosaicml/mpt-30b init_device: mixed - use_auth_token: true config_overrides: max_seq_len: ${max_seq_len} attn_config: @@ -80,7 +79,7 @@ parameters: # Tokenizer tokenizer: - name: mosaicml/mpt-30b-chat + name: mosaicml/mpt-30b kwargs: model_max_length: ${max_seq_len} @@ -143,7 +142,7 @@ parameters: clipping_threshold: 1.0 # Run configuration - max_duration: 4ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run + max_duration: 8ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run eval_interval: 1ep # How frequently to evaluate the model eval_first: true # Whether to evaluate the model before training eval_subset_num_batches: -1 # How many batches to evaluate on. -1 means evaluate on the entire dataset diff --git a/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle b/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle index cb6ec721a..7d8928b51 100644 Binary files a/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle and b/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle differ