added subquery chatting

mosaicml · Aug 22, 2023 · f06c49d · f06c49d
1 parent d6aa3da
commit f06c49d
Show file tree

Hide file tree

Showing 6 changed files with 112 additions and 37 deletions.
diff --git a/examples/end-to-end-examples/support_chatbot/app_demo.py b/examples/end-to-end-examples/support_chatbot/app_demo.py
@@ -22,19 +22,19 @@ def parse_args() -> Namespace:
     parser.add_argument(
         '--max_length',
         type=int,
-        default=1000,
+        default=6000,
         required=False,
-        help='The maximum size of context from LangChain')
+        help='The maximum size tokens in model')
     parser.add_argument(
         '--chunk_size',
         type=int,
-        default=1000,
+        default=800,
         required=False,
         help='The chunk size when splitting documents')
     parser.add_argument(
         '--chunk_overlap',
         type=int,
-        default=300,
+        default=400,
         required=False,
         help='The overlap between chunks when splitting documents')
     parser.add_argument(
@@ -55,6 +55,12 @@ def parse_args() -> Namespace:
         default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry',
         required=False,
         help='The GitHub repository URLs to download')
+    parser.add_argument(
+        '--complex_chat',
+        type=bool,
+        default=False,
+        required=False,
+        help='Whether to use subquery chatting')
 
     parsed = parser.parse_args()
 
@@ -70,7 +76,8 @@ def main(endpoint_url: str,
          chunk_overlap: int,
          retrieval_k: int,
          model_k: int,
-         repository_urls: list[str]) -> None:
+         repository_urls: list[str],
+         chat_version: bool) -> None:
 
     retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_demo')
 
@@ -106,7 +113,10 @@ def chat_wrapper(query: str) -> str:
 
         Returns:
             str: The response from chatbot"""
-        return chatbot.chat(query)
+        if chat_version:
+            return chatbot.sub_query_chat(query)
+        else:
+            return chatbot.chat(query)
 
     def gradio_chat():
         """Simple gradio application for querying the model"""
@@ -133,4 +143,5 @@ def gradio_chat():
         retrieval_k = args.retrieval_k,
         model_k = args.model_k,
         repository_urls = args.repository_urls,
+        chat_version = args.complex_chat
     )
diff --git a/examples/end-to-end-examples/support_chatbot/app_slack.py b/examples/end-to-end-examples/support_chatbot/app_slack.py
@@ -30,16 +30,17 @@ def parse_args() -> Namespace:
     """Parse commandline arguments."""
     parser = ArgumentParser(description='Run a chatbot!')
     parser.add_argument('--endpoint_url', type=str, default='https://models.hosted-on.mosaicml.hosting/mpt-30b-chat/v1/predict', required=False, help='The endpoint of our MosaicML LLM Model')
-    parser.add_argument('--max_length', type=int, default=1000, required=False, help='The maximum size of context from LangChain')
-    parser.add_argument('--chunk_size', type=int, default=1000, required=False, help='The chunk size when splitting documents')
-    parser.add_argument('--chunk_overlap', type=int, default=300, required=False, help='The overlap between chunks when splitting documents')
+    parser.add_argument('--max_length', type=int, default=1200, required=False, help='The maximum size of context from LangChain')
+    parser.add_argument('--chunk_size', type=int, default=1200, required=False, help='The chunk size when splitting documents')
+    parser.add_argument('--chunk_overlap', type=int, default=800, required=False, help='The overlap between chunks when splitting documents')
     parser.add_argument('--retrieval_k', type=int, default=5, required=False, help='The number of chunks to retrieve as context from vector store')
     parser.add_argument('--model_k', type=int, default=10, required=False, help='The number of outputs model should output')
     parser.add_argument('--repository_urls', type=str, default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry', required=False, help='The GitHub repository URLs to download')
     parser.add_argument('--data_collecting', type=bool, default=False, help='Where successful threads will be stored')
     parser.add_argument('--slack_token', type=str, help='Slack Token')
     parser.add_argument('--slack_signing_secret', type=str, help='Slack Signing Secret')
     parser.add_argument('--oci_data_storage', type=str, default='oci://mosaicml-internal-checkpoints/support-bot-demo/slack-data', help='Where successful threads will be stored')
+    parser.add_argument('--complex_chat', type=bool, default=False, help='Where successful threads will be stored')
 
     parsed = parser.parse_args()
     if parsed.repository_urls is not None:
@@ -103,15 +104,18 @@ def slack_events():
 
         # Construct the message for the model
         conversation = " ".join(conversation_msgs)
-        if len(conversation) > 1000:
-            conversation = conversation[-1000:]
+        if len(conversation) > 2000:
+            conversation = conversation[-2000:]
         user_msg = f"Here is the conversation so far: {conversation} Here is the question: {question_msg}"
 
         print(user_msg)
 
         # Respond quickly to Slack
         response = jsonify({'status': 'acknowledged'})
-        chat_response = chatbot.chat(user_msg)
+        if chat_version:
+            chat_response = chatbot.sub_query_chat(user_msg)
+        else:
+            chat_response = chatbot.chat(user_msg)
 
         # Post response in the same thread
         post_args = {'channel': channel_id, 'text': chat_response, 'thread_ts': thread_ts}
@@ -245,14 +249,6 @@ def verify_slack_request(request):
 
     return hmac.compare_digest(my_signature, signature)
 
-# @app.before_request
-# def before_slack_event_request():
-#     """
-#     Before processing a request, verify it's from Slack.
-#     """
-#     if not verify_slack_request(request):
-#         return jsonify({'message': 'Unauthorized'}), 401
-
 def main(endpoint_url: str, 
          max_length: int, 
          chunk_size: int, 
@@ -263,7 +259,8 @@ def main(endpoint_url: str,
          data_collecting: bool,
          slack_token: str, 
          slack_signing_secret: str,
-         oci_data_storage: str):
+         oci_data_storage: str,
+         complex_chat: bool):
 
     if slack_token is None:
         try:
@@ -277,10 +274,11 @@ def main(endpoint_url: str,
         except KeyError:
             ValueError('No slack signing secret provided. Please provide a slack signing secret or set the SLACK_BOT_TOKEN environment variable')
 
-    global chatbot, client, oci_manager, read_slack, signing_secret
+    global chatbot, client, oci_manager, read_slack, signing_secret, chat_version
     oci_manager = OCIObjectStorageManager(oci_uri=oci_data_storage)
     read_slack = data_collecting
     signing_secret = slack_signing_secret
+    chat_version = complex_chat
 
     retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_slack')
 
@@ -331,4 +329,5 @@ def main(endpoint_url: str,
          data_collecting=args.data_collecting,
          slack_token=args.slack_token, 
          slack_signing_secret=args.slack_signing_secret, 
-         oci_data_storage=args.oci_data_storage)
+         oci_data_storage=args.oci_data_storage,
+         complex_chat=args.complex_chat)
diff --git a/examples/end-to-end-examples/support_chatbot/chatbot.py b/examples/end-to-end-examples/support_chatbot/chatbot.py
@@ -65,6 +65,41 @@ def get_relevant_documents(self, query: str) -> list[Document]:
                      <|im_start|>user
                      {question}<|im_end|>
                      <|im_start|>assistant""")
+SUBQUERY_INTENT_TEMPLATE = ("""<|im_start|>system
+                               A conversation between a user and an LLM-based AI assistant about the codebase for MosaicML. 
+                               Provide a helpful, short and simple answer given the following context to the question. If you do not know, just say "I 
+                               do not know".<|im_end|>
+                               <|im_start|>context
+                               {context}<|im_end|>
+                               <|im_start|>user
+                               Why would the user ask the following question: {question}<|im_end|>
+                               <|im_start|>assistant""")
+PARTIAL_SUBQA_TEMPLATE = ("""<|im_start|>system
+                             A conversation between a user and an LLM-based AI assistant about the codebase for MosaicML. 
+                             Given the context, the job of the assistant is to determine if the context is useful for answering the user's question.
+                             If so, the assistant will break the question into smaller questions that can likely be answered by a single section of 
+                             the relevant context. If the context is not directly related to the user's question, the assistant will just break the 
+                             question into simpler questions not related to the context that may be helpful for answering the question.<|im_end|>
+                             <|im_start|>context
+                             {{context}}<|im_end|>
+                             <|im_start|>user
+                             {{question}} {} Can this question be answered with the context given alone? If so, break the question down into at most five
+                             smaller questions that can likely be answered by a single section of the relevant documentation. If not, break the 
+                             question down into at most five helpful questions.
+                             Please only respond with a list of smaller questions without any extra information.<|im_end|>
+                             <|im_start|>assistant""")
+PARTIAL_COMBINE_TEMPLATE = ("""<|im_start|>system
+                               A conversation between a user and an LLM-based AI assistant. 
+                               Here are smaller questions regarding the user's question. If you don't know answer pretend like
+                               the question doesn't exist:
+                               {}
+                               Provide a helpful and in depth answer given the following context to the question. 
+                               If you do not know, just say "I do not know".<|im_end|>
+                               <|im_start|>context
+                               {{context}}<|im_end|>
+                               <|im_start|>user
+                               {{question}}<|im_end|>
+                               <|im_start|>assistant""")
 
 EVAL_SIMPLE_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/composer_docstrings.jsonl')
 EVAL_COMPLEX_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/complex_eval.jsonl')
@@ -121,6 +156,7 @@ def __init__(self,
         self.saved_state = {'k': k, 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap, 'model_k': model.model_kwargs['top_k'],
                             'endpoint_url': model.endpoint_url}
         self.chat_chain = None
+        self.intent_chain = None
         self.slack_path = slack_path
         self.vector_store = None
 
@@ -358,8 +394,8 @@ def reload_chat_state(self) -> None:
         self.model.endpoint_url = self.saved_state['endpoint_url']
 
     def evaluate_simple(self, 
-                 data_path: str,
-                 answer_question_string_template: str) -> str:
+                        data_path: str,
+                        answer_question_string_template: str) -> str:
         """Evaluate the chatbot on simple retrieval dataset given a data_path and a chain
 
         Args:
@@ -394,8 +430,8 @@ def evaluate_simple(self,
         return f'Given Score: {(exact_match + 0.5*close_match)/ total} with {exact_match} exact matches and {close_match} close matches out of {total} questions.'
 
     def evaluate_complex(self, 
-                 data_path: str,
-                 answer_question_string_template: str) -> str:
+                         data_path: str,
+                         answer_question_string_template: str) -> str:
         """Evaluate the chatbot on complex eval dataset given a data_path and a chain
         
         Args:
@@ -419,6 +455,36 @@ def evaluate_complex(self,
                 save += f'Question:\n{question}\nAnswer:\n{continuation}\nResponse:\n{answer}\n\n'
         return save
 
+    def sub_query_chat(self,
+                       query: str)-> str:
+        if not self.intent_chain:
+            self.intent_chain = self.create_chain(SUBQUERY_INTENT_TEMPLATE)
+        intent_response = self.intent_chain(query)
+        intent_answer = self.clean_response(intent_response['result'].lstrip('\n'))
+
+        SUBQUERY_SUBQA_TEMPLATE = PARTIAL_SUBQA_TEMPLATE.format(intent_answer)
+        subQA_chain = self.create_chain(SUBQUERY_SUBQA_TEMPLATE)
+        subQA_response = subQA_chain(query)
+        subQA_answer = self.clean_response(subQA_response['result'].lstrip('\n'))
+
+        SUBQUERY_COMBINE_TEMPLATE = PARTIAL_COMBINE_TEMPLATE.format(subQA_answer)
+        combine_chain = self.create_chain(SUBQUERY_COMBINE_TEMPLATE)
+        combine_response = combine_chain(query)
+        combine_answer = self.clean_response(combine_response['result'].lstrip('\n'))
+        sources = ''
+        slack_deduplicate = True
+        for d in combine_response['source_documents']:
+            if d.metadata["score"] < 0.6:
+                if 'message_from_slack' == sources[:18] and slack_deduplicate:
+                    sources = sources + 'slack_data\n'
+                    slack_deduplicate = False
+                else:
+                    sources = sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n'
+        if not sources:
+            return f'Answer: \n{combine_answer}\n\nIntent: \n{intent_answer}\n\nSub-questions: \n{subQA_answer}'
+        else:
+            return f'Answer: \n{combine_answer}\n\nIntent: \n{intent_answer}\n\nSub-questions: \n{subQA_answer}\nSources: \n{sources}'
+
     def chat(self, 
              query: str) -> str:
         """Chat with the chatbot given a query

diff --git a/...les/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml b/...les/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml
@@ -22,8 +22,8 @@ integrations:
 command: |
   cd llm-foundry/scripts/inference
   python convert_composer_to_hf.py \
-    --composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink \
-    --hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/ \
+    --composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/latest-rank0.pt.symlink \
+    --hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer_chatv2-hf/ \
     --output_precision bf16 \
 
 image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 # Use the Docker image provided by MosaicML
diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml
@@ -1,7 +1,7 @@
-name: mpt-30b-chat_composer_chatv2
+name: mpt-30b-PyPi_composer_chatv2
 
 scheduling:
-  resumable: false
+  resumable: true
   priority: low
 
 compute:
@@ -43,13 +43,13 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest # Use the Docker image provided b
 # See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
 parameters:
   # Path to load the weights from the previous step
-  load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink
+  load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi_composer/latest-rank0.pt.symlink
   load_weights_only: false # Only load the weights for finetuning, discarding any other state from previous training
 
   # Checkpoint to local filesystem or remote object store
   save_interval: 2ep # How frequently to save checkpoints
   save_num_checkpoints_to_keep: 1  # Important, this cleans up checkpoints saved to DISK
-  save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/
+  save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-PyPi_composer_chatv2/
   save_weights_only: false
   dist_timeout: 600000 # Set large dist_timeout to allow for checkpoint uploading on a slow connection
 
@@ -69,9 +69,8 @@ parameters:
   model:
     name: hf_causal_lm
     pretrained: false
-    pretrained_model_name_or_path: mosaicml/mpt-30b-chat
+    pretrained_model_name_or_path: mosaicml/mpt-30b
     init_device: mixed
-    use_auth_token: true
     config_overrides:
       max_seq_len: ${max_seq_len}
       attn_config:
@@ -80,7 +79,7 @@ parameters:
 
   # Tokenizer
   tokenizer:
-    name: mosaicml/mpt-30b-chat
+    name: mosaicml/mpt-30b
     kwargs:
       model_max_length: ${max_seq_len}
 
@@ -143,7 +142,7 @@ parameters:
       clipping_threshold: 1.0
 
   # Run configuration
-  max_duration: 4ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
+  max_duration: 8ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
   eval_interval: 1ep # How frequently to evaluate the model
   eval_first: true # Whether to evaluate the model before training
   eval_subset_num_batches: -1 # How many batches to evaluate on. -1 means evaluate on the entire dataset

diff --git a/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle b/examples/end-to-end-examples/support_chatbot/retrieval_data_demo/vectors.pickle