Skip to content

Commit

Permalink
added subquery chatting
Browse files Browse the repository at this point in the history
  • Loading branch information
KuuCi committed Aug 22, 2023
1 parent d6aa3da commit f06c49d
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 37 deletions.
23 changes: 17 additions & 6 deletions examples/end-to-end-examples/support_chatbot/app_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@ def parse_args() -> Namespace:
parser.add_argument(
'--max_length',
type=int,
default=1000,
default=6000,
required=False,
help='The maximum size of context from LangChain')
help='The maximum size tokens in model')
parser.add_argument(
'--chunk_size',
type=int,
default=1000,
default=800,
required=False,
help='The chunk size when splitting documents')
parser.add_argument(
'--chunk_overlap',
type=int,
default=300,
default=400,
required=False,
help='The overlap between chunks when splitting documents')
parser.add_argument(
Expand All @@ -55,6 +55,12 @@ def parse_args() -> Namespace:
default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry',
required=False,
help='The GitHub repository URLs to download')
parser.add_argument(
'--complex_chat',
type=bool,
default=False,
required=False,
help='Whether to use subquery chatting')

parsed = parser.parse_args()

Expand All @@ -70,7 +76,8 @@ def main(endpoint_url: str,
chunk_overlap: int,
retrieval_k: int,
model_k: int,
repository_urls: list[str]) -> None:
repository_urls: list[str],
chat_version: bool) -> None:

retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_demo')

Expand Down Expand Up @@ -106,7 +113,10 @@ def chat_wrapper(query: str) -> str:
Returns:
str: The response from chatbot"""
return chatbot.chat(query)
if chat_version:
return chatbot.sub_query_chat(query)
else:
return chatbot.chat(query)

def gradio_chat():
"""Simple gradio application for querying the model"""
Expand All @@ -133,4 +143,5 @@ def gradio_chat():
retrieval_k = args.retrieval_k,
model_k = args.model_k,
repository_urls = args.repository_urls,
chat_version = args.complex_chat
)
33 changes: 16 additions & 17 deletions examples/end-to-end-examples/support_chatbot/app_slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,17 @@ def parse_args() -> Namespace:
"""Parse commandline arguments."""
parser = ArgumentParser(description='Run a chatbot!')
parser.add_argument('--endpoint_url', type=str, default='https://models.hosted-on.mosaicml.hosting/mpt-30b-chat/v1/predict', required=False, help='The endpoint of our MosaicML LLM Model')
parser.add_argument('--max_length', type=int, default=1000, required=False, help='The maximum size of context from LangChain')
parser.add_argument('--chunk_size', type=int, default=1000, required=False, help='The chunk size when splitting documents')
parser.add_argument('--chunk_overlap', type=int, default=300, required=False, help='The overlap between chunks when splitting documents')
parser.add_argument('--max_length', type=int, default=1200, required=False, help='The maximum size of context from LangChain')
parser.add_argument('--chunk_size', type=int, default=1200, required=False, help='The chunk size when splitting documents')
parser.add_argument('--chunk_overlap', type=int, default=800, required=False, help='The overlap between chunks when splitting documents')
parser.add_argument('--retrieval_k', type=int, default=5, required=False, help='The number of chunks to retrieve as context from vector store')
parser.add_argument('--model_k', type=int, default=10, required=False, help='The number of outputs model should output')
parser.add_argument('--repository_urls', type=str, default='https://github.com/mosaicml/composer,https://github.com/mosaicml/streaming,https://github.com/mosaicml/examples,https://github.com/mosaicml/diffusion,https://github.com/mosaicml/llm-foundry', required=False, help='The GitHub repository URLs to download')
parser.add_argument('--data_collecting', type=bool, default=False, help='Where successful threads will be stored')
parser.add_argument('--slack_token', type=str, help='Slack Token')
parser.add_argument('--slack_signing_secret', type=str, help='Slack Signing Secret')
parser.add_argument('--oci_data_storage', type=str, default='oci://mosaicml-internal-checkpoints/support-bot-demo/slack-data', help='Where successful threads will be stored')
parser.add_argument('--complex_chat', type=bool, default=False, help='Where successful threads will be stored')

parsed = parser.parse_args()
if parsed.repository_urls is not None:
Expand Down Expand Up @@ -103,15 +104,18 @@ def slack_events():

# Construct the message for the model
conversation = " ".join(conversation_msgs)
if len(conversation) > 1000:
conversation = conversation[-1000:]
if len(conversation) > 2000:
conversation = conversation[-2000:]
user_msg = f"Here is the conversation so far: {conversation} Here is the question: {question_msg}"

print(user_msg)

# Respond quickly to Slack
response = jsonify({'status': 'acknowledged'})
chat_response = chatbot.chat(user_msg)
if chat_version:
chat_response = chatbot.sub_query_chat(user_msg)
else:
chat_response = chatbot.chat(user_msg)

# Post response in the same thread
post_args = {'channel': channel_id, 'text': chat_response, 'thread_ts': thread_ts}
Expand Down Expand Up @@ -245,14 +249,6 @@ def verify_slack_request(request):

return hmac.compare_digest(my_signature, signature)

# @app.before_request
# def before_slack_event_request():
# """
# Before processing a request, verify it's from Slack.
# """
# if not verify_slack_request(request):
# return jsonify({'message': 'Unauthorized'}), 401

def main(endpoint_url: str,
max_length: int,
chunk_size: int,
Expand All @@ -263,7 +259,8 @@ def main(endpoint_url: str,
data_collecting: bool,
slack_token: str,
slack_signing_secret: str,
oci_data_storage: str):
oci_data_storage: str,
complex_chat: bool):

if slack_token is None:
try:
Expand All @@ -277,10 +274,11 @@ def main(endpoint_url: str,
except KeyError:
ValueError('No slack signing secret provided. Please provide a slack signing secret or set the SLACK_BOT_TOKEN environment variable')

global chatbot, client, oci_manager, read_slack, signing_secret
global chatbot, client, oci_manager, read_slack, signing_secret, chat_version
oci_manager = OCIObjectStorageManager(oci_uri=oci_data_storage)
read_slack = data_collecting
signing_secret = slack_signing_secret
chat_version = complex_chat

retrieval_dir = os.path.join(ROOT_DIR, 'retrieval_data_slack')

Expand Down Expand Up @@ -331,4 +329,5 @@ def main(endpoint_url: str,
data_collecting=args.data_collecting,
slack_token=args.slack_token,
slack_signing_secret=args.slack_signing_secret,
oci_data_storage=args.oci_data_storage)
oci_data_storage=args.oci_data_storage,
complex_chat=args.complex_chat)
74 changes: 70 additions & 4 deletions examples/end-to-end-examples/support_chatbot/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,41 @@ def get_relevant_documents(self, query: str) -> list[Document]:
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant""")
SUBQUERY_INTENT_TEMPLATE = ("""<|im_start|>system
A conversation between a user and an LLM-based AI assistant about the codebase for MosaicML.
Provide a helpful, short and simple answer given the following context to the question. If you do not know, just say "I
do not know".<|im_end|>
<|im_start|>context
{context}<|im_end|>
<|im_start|>user
Why would the user ask the following question: {question}<|im_end|>
<|im_start|>assistant""")
PARTIAL_SUBQA_TEMPLATE = ("""<|im_start|>system
A conversation between a user and an LLM-based AI assistant about the codebase for MosaicML.
Given the context, the job of the assistant is to determine if the context is useful for answering the user's question.
If so, the assistant will break the question into smaller questions that can likely be answered by a single section of
the relevant context. If the context is not directly related to the user's question, the assistant will just break the
question into simpler questions not related to the context that may be helpful for answering the question.<|im_end|>
<|im_start|>context
{{context}}<|im_end|>
<|im_start|>user
{{question}} {} Can this question be answered with the context given alone? If so, break the question down into at most five
smaller questions that can likely be answered by a single section of the relevant documentation. If not, break the
question down into at most five helpful questions.
Please only respond with a list of smaller questions without any extra information.<|im_end|>
<|im_start|>assistant""")
PARTIAL_COMBINE_TEMPLATE = ("""<|im_start|>system
A conversation between a user and an LLM-based AI assistant.
Here are smaller questions regarding the user's question. If you don't know answer pretend like
the question doesn't exist:
{}
Provide a helpful and in depth answer given the following context to the question.
If you do not know, just say "I do not know".<|im_end|>
<|im_start|>context
{{context}}<|im_end|>
<|im_start|>user
{{question}}<|im_end|>
<|im_start|>assistant""")

EVAL_SIMPLE_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/composer_docstrings.jsonl')
EVAL_COMPLEX_DIR = os.path.join(ROOT_DIR, 'train_data/pipeline_data/complex_eval.jsonl')
Expand Down Expand Up @@ -121,6 +156,7 @@ def __init__(self,
self.saved_state = {'k': k, 'chunk_size': chunk_size, 'chunk_overlap': chunk_overlap, 'model_k': model.model_kwargs['top_k'],
'endpoint_url': model.endpoint_url}
self.chat_chain = None
self.intent_chain = None
self.slack_path = slack_path
self.vector_store = None

Expand Down Expand Up @@ -358,8 +394,8 @@ def reload_chat_state(self) -> None:
self.model.endpoint_url = self.saved_state['endpoint_url']

def evaluate_simple(self,
data_path: str,
answer_question_string_template: str) -> str:
data_path: str,
answer_question_string_template: str) -> str:
"""Evaluate the chatbot on simple retrieval dataset given a data_path and a chain
Args:
Expand Down Expand Up @@ -394,8 +430,8 @@ def evaluate_simple(self,
return f'Given Score: {(exact_match + 0.5*close_match)/ total} with {exact_match} exact matches and {close_match} close matches out of {total} questions.'

def evaluate_complex(self,
data_path: str,
answer_question_string_template: str) -> str:
data_path: str,
answer_question_string_template: str) -> str:
"""Evaluate the chatbot on complex eval dataset given a data_path and a chain
Args:
Expand All @@ -419,6 +455,36 @@ def evaluate_complex(self,
save += f'Question:\n{question}\nAnswer:\n{continuation}\nResponse:\n{answer}\n\n'
return save

def sub_query_chat(self,
query: str)-> str:
if not self.intent_chain:
self.intent_chain = self.create_chain(SUBQUERY_INTENT_TEMPLATE)
intent_response = self.intent_chain(query)
intent_answer = self.clean_response(intent_response['result'].lstrip('\n'))

SUBQUERY_SUBQA_TEMPLATE = PARTIAL_SUBQA_TEMPLATE.format(intent_answer)
subQA_chain = self.create_chain(SUBQUERY_SUBQA_TEMPLATE)
subQA_response = subQA_chain(query)
subQA_answer = self.clean_response(subQA_response['result'].lstrip('\n'))

SUBQUERY_COMBINE_TEMPLATE = PARTIAL_COMBINE_TEMPLATE.format(subQA_answer)
combine_chain = self.create_chain(SUBQUERY_COMBINE_TEMPLATE)
combine_response = combine_chain(query)
combine_answer = self.clean_response(combine_response['result'].lstrip('\n'))
sources = ''
slack_deduplicate = True
for d in combine_response['source_documents']:
if d.metadata["score"] < 0.6:
if 'message_from_slack' == sources[:18] and slack_deduplicate:
sources = sources + 'slack_data\n'
slack_deduplicate = False
else:
sources = sources + f'{d.metadata["file_name"].replace("{slash}", "/")}\n'
if not sources:
return f'Answer: \n{combine_answer}\n\nIntent: \n{intent_answer}\n\nSub-questions: \n{subQA_answer}'
else:
return f'Answer: \n{combine_answer}\n\nIntent: \n{intent_answer}\n\nSub-questions: \n{subQA_answer}\nSources: \n{sources}'

def chat(self,
query: str) -> str:
"""Chat with the chatbot given a query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ integrations:
command: |
cd llm-foundry/scripts/inference
python convert_composer_to_hf.py \
--composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink \
--hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/ \
--composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/latest-rank0.pt.symlink \
--hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer_chatv2-hf/ \
--output_precision bf16 \
image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 # Use the Docker image provided by MosaicML
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: mpt-30b-chat_composer_chatv2
name: mpt-30b-PyPi_composer_chatv2

scheduling:
resumable: false
resumable: true
priority: low

compute:
Expand Down Expand Up @@ -43,13 +43,13 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest # Use the Docker image provided b
# See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
parameters:
# Path to load the weights from the previous step
load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink
load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi_composer/latest-rank0.pt.symlink
load_weights_only: false # Only load the weights for finetuning, discarding any other state from previous training

# Checkpoint to local filesystem or remote object store
save_interval: 2ep # How frequently to save checkpoints
save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK
save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/
save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-PyPi_composer_chatv2/
save_weights_only: false
dist_timeout: 600000 # Set large dist_timeout to allow for checkpoint uploading on a slow connection

Expand All @@ -69,9 +69,8 @@ parameters:
model:
name: hf_causal_lm
pretrained: false
pretrained_model_name_or_path: mosaicml/mpt-30b-chat
pretrained_model_name_or_path: mosaicml/mpt-30b
init_device: mixed
use_auth_token: true
config_overrides:
max_seq_len: ${max_seq_len}
attn_config:
Expand All @@ -80,7 +79,7 @@ parameters:

# Tokenizer
tokenizer:
name: mosaicml/mpt-30b-chat
name: mosaicml/mpt-30b
kwargs:
model_max_length: ${max_seq_len}

Expand Down Expand Up @@ -143,7 +142,7 @@ parameters:
clipping_threshold: 1.0

# Run configuration
max_duration: 4ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
max_duration: 8ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
eval_interval: 1ep # How frequently to evaluate the model
eval_first: true # Whether to evaluate the model before training
eval_subset_num_batches: -1 # How many batches to evaluate on. -1 means evaluate on the entire dataset
Expand Down
Binary file not shown.

0 comments on commit f06c49d

Please sign in to comment.