From 6b42a478bc674e41b9e5cd4911ff54e5ce91364a Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 13 Aug 2024 12:55:46 +0530 Subject: [PATCH 01/28] conversions flow config added --- .../utilities/helpers/config/config_helper.py | 14 ++++++++++++++ .../helpers/config/conversation_flow.py | 2 +- .../batch/utilities/helpers/config/default.json | 3 ++- code/backend/pages/04_Configuration.py | 16 ++++++++++++++++ code/create_app.py | 2 +- 5 files changed, 34 insertions(+), 3 deletions(-) diff --git a/code/backend/batch/utilities/helpers/config/config_helper.py b/code/backend/batch/utilities/helpers/config/config_helper.py index 677bdf5d4..1e33e2794 100644 --- a/code/backend/batch/utilities/helpers/config/config_helper.py +++ b/code/backend/batch/utilities/helpers/config/config_helper.py @@ -12,6 +12,7 @@ from ...orchestrator import OrchestrationSettings from ..env_helper import EnvHelper from .assistant_strategy import AssistantStrategy +from .conversation_flow import ConversationFlow CONFIG_CONTAINER_NAME = "config" CONFIG_FILE_NAME = "active.json" @@ -90,6 +91,9 @@ def get_available_orchestration_strategies(self): def get_available_ai_assistant_types(self): return [c.value for c in AssistantStrategy] + def get_available_conversational_flows(self): + return [c.value for c in ConversationFlow] + # TODO: Change to AnsweringChain or something, Prompts is not a good name class Prompts: @@ -102,6 +106,7 @@ def __init__(self, prompts: dict): self.enable_post_answering_prompt = prompts["enable_post_answering_prompt"] self.enable_content_safety = prompts["enable_content_safety"] self.ai_assistant_type = prompts["ai_assistant_type"] + self.conversational_flow = prompts["conversational_flow"] class Example: @@ -173,6 +178,9 @@ def _set_new_config_properties(config: dict, default_config: dict): "integrated_vectorization_config" ] + if config["prompts"].get("conversational_flow") is None: + config["prompts"]["conversational_flow"] = default_config["prompts"]["conversational_flow"] + @staticmethod @functools.cache def get_active_config_or_default(): @@ -199,6 +207,12 @@ def get_default_assistant_prompt(): config = ConfigHelper.get_default_config() return config["prompts"]["answering_user_prompt"] + @staticmethod + @functools.cache + def get_default_conversational_flow_prompt(): + config = ConfigHelper.get_default_config() + return config["prompts"]["conversational_flow"] + @staticmethod def save_config_as_active(config): ConfigHelper.validate_config(config) diff --git a/code/backend/batch/utilities/helpers/config/conversation_flow.py b/code/backend/batch/utilities/helpers/config/conversation_flow.py index 29ccbf79f..cfab80aed 100644 --- a/code/backend/batch/utilities/helpers/config/conversation_flow.py +++ b/code/backend/batch/utilities/helpers/config/conversation_flow.py @@ -3,4 +3,4 @@ class ConversationFlow(Enum): CUSTOM = "custom" - BYOD = "byod" + BYOD = "bring your own data" diff --git a/code/backend/batch/utilities/helpers/config/default.json b/code/backend/batch/utilities/helpers/config/default.json index cf3999ab7..9bfa9c813 100644 --- a/code/backend/batch/utilities/helpers/config/default.json +++ b/code/backend/batch/utilities/helpers/config/default.json @@ -8,7 +8,8 @@ "use_on_your_data_format": true, "enable_post_answering_prompt": false, "ai_assistant_type": "default", - "enable_content_safety": true + "enable_content_safety": true, + "conversational_flow": "custom" }, "example": { "documents": "{\n \"retrieved_documents\": [\n {\n \"[doc1]\": {\n \"content\": \"Dual Transformer Encoder (DTE) DTE (https://dev.azure.com/TScience/TSciencePublic/_wiki/wikis/TSciencePublic.wiki/82/Dual-Transformer-Encoder) DTE is a general pair-oriented sentence representation learning framework based on transformers. It provides training, inference and evaluation for sentence similarity models. Model Details DTE can be used to train a model for sentence similarity with the following features: - Build upon existing transformer-based text representations (e.g.TNLR, BERT, RoBERTa, BAG-NLR) - Apply smoothness inducing technology to improve the representation robustness - SMART (https://arxiv.org/abs/1911.03437) SMART - Apply NCE (Noise Contrastive Estimation) based similarity learning to speed up training of 100M pairs We use pretrained DTE model\"\n }\n },\n {\n \"[doc2]\": {\n \"content\": \"trained on internal data. You can find more details here - Models.md (https://dev.azure.com/TScience/_git/TSciencePublic?path=%2FDualTransformerEncoder%2FMODELS.md&version=GBmaster&_a=preview) Models.md DTE-pretrained for In-context Learning Research suggests that finetuned transformers can be used to retrieve semantically similar exemplars for e.g. KATE (https://arxiv.org/pdf/2101.06804.pdf) KATE . They show that finetuned models esp. tuned on related tasks give the maximum boost to GPT-3 in-context performance. DTE have lot of pretrained models that are trained on intent classification tasks. We can use these model embedding to find natural language utterances which are similar to our test utterances at test time. The steps are: 1. Embed\"\n }\n },\n {\n \"[doc3]\": {\n \"content\": \"train and test utterances using DTE model 2. For each test embedding, find K-nearest neighbors. 3. Prefix the prompt with nearest embeddings. The following diagram from the above paper (https://arxiv.org/pdf/2101.06804.pdf) the above paper visualizes this process: DTE-Finetuned This is an extension of DTE-pretrained method where we further finetune the embedding models for prompt crafting task. In summary, we sample random prompts from our training data and use them for GPT-3 inference for the another part of training data. Some prompts work better and lead to right results whereas other prompts lead\"\n }\n },\n {\n \"[doc4]\": {\n \"content\": \"to wrong completions. We finetune the model on the downstream task of whether a prompt is good or not based on whether it leads to right or wrong completion. This approach is similar to this paper: Learning To Retrieve Prompts for In-Context Learning (https://arxiv.org/pdf/2112.08633.pdf) this paper: Learning To Retrieve Prompts for In-Context Learning . This method is very general but it may require a lot of data to actually finetune a model to learn how to retrieve examples suitable for the downstream inference model like GPT-3.\"\n }\n }\n ]\n}", diff --git a/code/backend/pages/04_Configuration.py b/code/backend/pages/04_Configuration.py index c5f682953..9199ac18c 100644 --- a/code/backend/pages/04_Configuration.py +++ b/code/backend/pages/04_Configuration.py @@ -8,6 +8,7 @@ from batch.utilities.helpers.config.config_helper import ConfigHelper from azure.core.exceptions import ResourceNotFoundError from batch.utilities.helpers.config.assistant_strategy import AssistantStrategy +from batch.utilities.helpers.config.conversation_flow import ConversationFlow sys.path.append(os.path.join(os.path.dirname(__file__), "..")) env_helper: EnvHelper = EnvHelper() @@ -66,6 +67,8 @@ def load_css(file_path): st.session_state["orchestrator_strategy"] = config.orchestrator.strategy.value if "ai_assistant_type" not in st.session_state: st.session_state["ai_assistant_type"] = config.prompts.ai_assistant_type +if "conversational_flow" not in st.session_state: + st.session_state["conversational_flow"] = config.prompts.conversational_flow if env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: if "max_page_length" not in st.session_state: @@ -164,6 +167,17 @@ def validate_documents(): try: + conversational_flow_help = "Whether to use the custom conversational flow or byod conversational flow. Refer to the Conversational flow options README for more details." + with st.expander("Conversational flow configuration", expanded=True): + cols = st.columns([2, 4]) + with cols[0]: + conv_flow = st.selectbox( + "Conversational flow", + key="conversational_flow", + options=config.get_available_conversational_flows(), + help=conversational_flow_help, + ) + with st.expander("Orchestrator configuration", expanded=True): cols = st.columns([2, 4]) with cols[0]: @@ -171,6 +185,7 @@ def validate_documents(): "Orchestrator strategy", key="orchestrator_strategy", options=config.get_available_orchestration_strategies(), + disabled= True if st.session_state["conversational_flow"] == ConversationFlow.BYOD.value else False ) # # # condense_question_prompt_help = "This prompt is used to convert the user's input to a standalone question, using the context of the chat history." @@ -378,6 +393,7 @@ def validate_documents(): ], "enable_content_safety": st.session_state["enable_content_safety"], "ai_assistant_type": st.session_state["ai_assistant_type"], + "conversational_flow": st.session_state["conversational_flow"], }, "messages": { "post_answering_filter": st.session_state[ diff --git a/code/create_app.py b/code/create_app.py index c8eba08b4..36a109d0a 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -405,7 +405,7 @@ async def conversation_custom(): @app.route("/api/conversation", methods=["POST"]) async def conversation(): - conversation_flow = env_helper.CONVERSATION_FLOW + conversation_flow = ConfigHelper.get_default_conversational_flow_prompt() if conversation_flow == ConversationFlow.CUSTOM.value: return await conversation_custom() elif conversation_flow == ConversationFlow.BYOD.value: From 59569727e9ad20fe78980e891918e502c1d91b65 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 13 Aug 2024 13:57:06 +0530 Subject: [PATCH 02/28] fixed config issue --- code/backend/batch/utilities/helpers/config/config_helper.py | 5 ----- code/create_app.py | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/code/backend/batch/utilities/helpers/config/config_helper.py b/code/backend/batch/utilities/helpers/config/config_helper.py index 1e33e2794..f88c6f4d3 100644 --- a/code/backend/batch/utilities/helpers/config/config_helper.py +++ b/code/backend/batch/utilities/helpers/config/config_helper.py @@ -207,11 +207,6 @@ def get_default_assistant_prompt(): config = ConfigHelper.get_default_config() return config["prompts"]["answering_user_prompt"] - @staticmethod - @functools.cache - def get_default_conversational_flow_prompt(): - config = ConfigHelper.get_default_config() - return config["prompts"]["conversational_flow"] @staticmethod def save_config_as_active(config): diff --git a/code/create_app.py b/code/create_app.py index 36a109d0a..0e6b8f06d 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -405,7 +405,8 @@ async def conversation_custom(): @app.route("/api/conversation", methods=["POST"]) async def conversation(): - conversation_flow = ConfigHelper.get_default_conversational_flow_prompt() + result = ConfigHelper.get_active_config_or_default() + conversation_flow = result.prompts.conversational_flow if conversation_flow == ConversationFlow.CUSTOM.value: return await conversation_custom() elif conversation_flow == ConversationFlow.BYOD.value: From 5fcf930c2bf8da401328371f50b950108ba1a7c6 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 13 Aug 2024 17:00:54 +0530 Subject: [PATCH 03/28] enum updated --- .../backend/batch/utilities/helpers/config/conversation_flow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/backend/batch/utilities/helpers/config/conversation_flow.py b/code/backend/batch/utilities/helpers/config/conversation_flow.py index cfab80aed..29ccbf79f 100644 --- a/code/backend/batch/utilities/helpers/config/conversation_flow.py +++ b/code/backend/batch/utilities/helpers/config/conversation_flow.py @@ -3,4 +3,4 @@ class ConversationFlow(Enum): CUSTOM = "custom" - BYOD = "bring your own data" + BYOD = "byod" From 0667477ed48015d6bd94ec35b6ea9ada5bb1ccbb Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Fri, 16 Aug 2024 16:22:10 +0530 Subject: [PATCH 04/28] updated byod chat refferences --- code/backend/pages/04_Configuration.py | 7 ++++- code/create_app.py | 38 +++++++++++++------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/code/backend/pages/04_Configuration.py b/code/backend/pages/04_Configuration.py index 9199ac18c..4b9cb2fde 100644 --- a/code/backend/pages/04_Configuration.py +++ b/code/backend/pages/04_Configuration.py @@ -185,7 +185,12 @@ def validate_documents(): "Orchestrator strategy", key="orchestrator_strategy", options=config.get_available_orchestration_strategies(), - disabled= True if st.session_state["conversational_flow"] == ConversationFlow.BYOD.value else False + disabled=( + True + if st.session_state["conversational_flow"] + == ConversationFlow.BYOD.value + else False + ), ) # # # condense_question_prompt_help = "This prompt is used to convert the user's input to a standalone question, using the context of the chat history." diff --git a/code/create_app.py b/code/create_app.py index 0e6b8f06d..59822f8e4 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -19,6 +19,8 @@ from backend.batch.utilities.helpers.config.conversation_flow import ConversationFlow from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient from azure.identity import DefaultAzureCredential +from backend.batch.utilities.tools.question_answer_tool import QuestionAnswerTool +from backend.batch.utilities.parser.output_parser_tool import OutputParserTool ERROR_429_MESSAGE = "We're currently experiencing a high number of requests for the service you're trying to access. Please wait a moment and try again." ERROR_GENERIC_MESSAGE = "An error occurred. Please try again. If the problem persists, please contact the site administrator." @@ -166,30 +168,27 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper): ) if not env_helper.SHOULD_STREAM: + user_messages = list( + filter( + lambda x: x["role"] in ("user", "assistant"), + conversation.json["messages"][0:-1], + ) + ) + answer = QuestionAnswerTool().answer_question( + messages[-1]["content"], user_messages + ) + # Format the output for the UI + messages = OutputParserTool().parse( + question=answer.question, + answer=answer.answer, + source_documents=answer.source_documents, + ) response_obj = { "id": response.id, "model": response.model, "created": response.created, "object": response.object, - "choices": [ - { - "messages": [ - { - "content": json.dumps( - response.choices[0].message.model_extra["context"], - ensure_ascii=False, - ), - "end_turn": False, - "role": "tool", - }, - { - "end_turn": True, - "content": response.choices[0].message.content, - "role": "assistant", - }, - ] - } - ], + "choices": [{"messages": messages}], } return response_obj @@ -405,6 +404,7 @@ async def conversation_custom(): @app.route("/api/conversation", methods=["POST"]) async def conversation(): + ConfigHelper.get_active_config_or_default.cache_clear() result = ConfigHelper.get_active_config_or_default() conversation_flow = result.prompts.conversational_flow if conversation_flow == ConversationFlow.CUSTOM.value: From 461910ec2cee8c48582bb5d5f7c674b7e5d08f1b Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 27 Aug 2024 18:28:09 +0530 Subject: [PATCH 05/28] byod streaming --- code/create_app.py | 58 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/code/create_app.py b/code/create_app.py index bd50aa0df..b34376f3c 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -8,6 +8,7 @@ import mimetypes from os import path import sys +import re import requests from openai import AzureOpenAI, Stream, APIStatusError from openai.types.chat import ChatCompletionChunk @@ -21,13 +22,53 @@ from azure.identity import DefaultAzureCredential from backend.batch.utilities.tools.question_answer_tool import QuestionAnswerTool from backend.batch.utilities.parser.output_parser_tool import OutputParserTool +from backend.batch.utilities.search.search import Search ERROR_429_MESSAGE = "We're currently experiencing a high number of requests for the service you're trying to access. Please wait a moment and try again." ERROR_GENERIC_MESSAGE = "An error occurred. Please try again. If the problem persists, please contact the site administrator." logger = logging.getLogger(__name__) -def stream_with_data(response: Stream[ChatCompletionChunk]): +def parse_byod_answer(source_documents, doc_ids, citations): + # create return message object + + messages = [ + { + "role": "tool", + "content": {"citations": [*citations]}, + "end_turn": False, + } + ] + for i in doc_ids: + idx = i - 1 + + if idx >= len(source_documents): + logger.warning(f"Source document {i} not provided, skipping doc") + continue + + doc = source_documents[idx] + # Then update the citation object in the response, it needs to have filepath and chunk_id to render in the UI as a file + messages[0]["content"]["citations"].append( + { + "content": doc.content, + "id": doc.id, + "chunk_id": ( + re.findall(r"\d+", doc.chunk_id)[-1] + if doc.chunk_id is not None + else doc.chunk + ), + "title": doc.title, + "filepath": doc.get_filename(include_path=True), + "url": doc.get_markdown_url(), + } + ) + citations = messages[0]["content"]["citations"] + + messages[0]["content"] = json.dumps(messages[0]["content"]) + return messages, citations + + +def stream_with_data(response: Stream[ChatCompletionChunk], question: str): """This function streams the response from Azure OpenAI with data.""" response_obj = { "id": "", @@ -51,7 +92,9 @@ def stream_with_data(response: Stream[ChatCompletionChunk]): } ], } - + search_handler = Search.get_search_handler(env_helper=EnvHelper()) + source_documents = Search.get_source_documents(search_handler, question) + citations = [] for line in response: choice = line.choices[0] @@ -75,7 +118,13 @@ def stream_with_data(response: Stream[ChatCompletionChunk]): ) else: response_obj["choices"][0]["messages"][1]["content"] += delta.content - + docs = re.findall(r"\[doc(\d+)\]", delta.content.replace(" ", " ")) + if docs: + doc_ids = [int(i) for i in docs] + ref_content, citations = parse_byod_answer( + source_documents, doc_ids, citations + ) + response_obj["choices"][0]["messages"][0] = ref_content[0] yield json.dumps(response_obj, ensure_ascii=False) + "\n" @@ -194,7 +243,7 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper): return response_obj return Response( - stream_with_data(response), + stream_with_data(response, messages[-1]["content"]), mimetype="application/json-lines", ) @@ -404,7 +453,6 @@ async def conversation_custom(): @app.route("/api/conversation", methods=["POST"]) async def conversation(): - ConfigHelper.get_active_config_or_default.cache_clear() result = ConfigHelper.get_active_config_or_default() conversation_flow = result.prompts.conversational_flow if conversation_flow == ConversationFlow.CUSTOM.value: From 5ccf91a2cb8616d74bc2789b3e25a140b6b02e18 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Thu, 29 Aug 2024 12:34:41 +0530 Subject: [PATCH 06/28] byod stream data issue fix --- code/create_app.py | 85 +++++++++---------- .../src/components/Answer/AnswerParser.tsx | 2 +- 2 files changed, 39 insertions(+), 48 deletions(-) diff --git a/code/create_app.py b/code/create_app.py index b34376f3c..9b9b99ac2 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -14,6 +14,7 @@ from openai.types.chat import ChatCompletionChunk from flask import Flask, Response, request, Request, jsonify from dotenv import load_dotenv +from urllib.parse import quote from backend.batch.utilities.helpers.env_helper import EnvHelper from backend.batch.utilities.helpers.orchestrator_helper import Orchestrator from backend.batch.utilities.helpers.config.config_helper import ConfigHelper @@ -22,53 +23,51 @@ from azure.identity import DefaultAzureCredential from backend.batch.utilities.tools.question_answer_tool import QuestionAnswerTool from backend.batch.utilities.parser.output_parser_tool import OutputParserTool -from backend.batch.utilities.search.search import Search +from backend.batch.utilities.helpers.azure_blob_storage_client import ( + AzureBlobStorageClient, +) ERROR_429_MESSAGE = "We're currently experiencing a high number of requests for the service you're trying to access. Please wait a moment and try again." ERROR_GENERIC_MESSAGE = "An error occurred. Please try again. If the problem persists, please contact the site administrator." logger = logging.getLogger(__name__) -def parse_byod_answer(source_documents, doc_ids, citations): - # create return message object +def get_markdown_url(source, title): + """Get Markdown URL for a citation""" - messages = [ - { - "role": "tool", - "content": {"citations": [*citations]}, - "end_turn": False, - } - ] - for i in doc_ids: - idx = i - 1 + url = quote(source, safe=":/") + if "_SAS_TOKEN_PLACEHOLDER_" in url: + blob_client = AzureBlobStorageClient() + container_sas = blob_client.get_container_sas() + url = url.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas) + return f"[{title}]({url})" - if idx >= len(source_documents): - logger.warning(f"Source document {i} not provided, skipping doc") - continue - doc = source_documents[idx] - # Then update the citation object in the response, it needs to have filepath and chunk_id to render in the UI as a file - messages[0]["content"]["citations"].append( +def get_citations(citation_list): + """Returns Formated Citations""" + + citations_dict = {"citations": []} + for citation in citation_list.get("citations"): + metadata = json.loads(citation["url"]) + title = citation["title"] + citations_dict["citations"].append( { - "content": doc.content, - "id": doc.id, + "content": citation["content"], + "id": metadata["id"], "chunk_id": ( - re.findall(r"\d+", doc.chunk_id)[-1] - if doc.chunk_id is not None - else doc.chunk + re.findall(r"\d+", metadata["chunk_id"])[-1] + if metadata["chunk_id"] is not None + else metadata["chunk"] ), - "title": doc.title, - "filepath": doc.get_filename(include_path=True), - "url": doc.get_markdown_url(), + "title": title, + "filepath": title.split("/")[-1], + "url": get_markdown_url(metadata["source"], title), } ) - citations = messages[0]["content"]["citations"] + return citations_dict - messages[0]["content"] = json.dumps(messages[0]["content"]) - return messages, citations - -def stream_with_data(response: Stream[ChatCompletionChunk], question: str): +def stream_with_data(response: Stream[ChatCompletionChunk]): """This function streams the response from Azure OpenAI with data.""" response_obj = { "id": "", @@ -92,9 +91,7 @@ def stream_with_data(response: Stream[ChatCompletionChunk], question: str): } ], } - search_handler = Search.get_search_handler(env_helper=EnvHelper()) - source_documents = Search.get_source_documents(search_handler, question) - citations = [] + for line in response: choice = line.choices[0] @@ -112,19 +109,14 @@ def stream_with_data(response: Stream[ChatCompletionChunk], question: str): role = delta.role if role == "assistant": + citations = get_citations(delta.model_extra["context"]) response_obj["choices"][0]["messages"][0]["content"] = json.dumps( - delta.model_extra["context"], + citations, ensure_ascii=False, ) else: response_obj["choices"][0]["messages"][1]["content"] += delta.content - docs = re.findall(r"\[doc(\d+)\]", delta.content.replace(" ", " ")) - if docs: - doc_ids = [int(i) for i in docs] - ref_content, citations = parse_byod_answer( - source_documents, doc_ids, citations - ) - response_obj["choices"][0]["messages"][0] = ref_content[0] + yield json.dumps(response_obj, ensure_ascii=False) + "\n" @@ -186,7 +178,8 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper): env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN ], "title_field": env_helper.AZURE_SEARCH_TITLE_COLUMN or None, - "url_field": env_helper.AZURE_SEARCH_URL_COLUMN or None, + "url_field": env_helper.AZURE_SEARCH_FIELDS_METADATA + or None, "filepath_field": ( env_helper.AZURE_SEARCH_FILENAME_COLUMN or None ), @@ -242,10 +235,7 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper): return response_obj - return Response( - stream_with_data(response, messages[-1]["content"]), - mimetype="application/json-lines", - ) + return Response(stream_with_data(response), mimetype="application/json-lines") def stream_without_data(response: Stream[ChatCompletionChunk]): @@ -453,6 +443,7 @@ async def conversation_custom(): @app.route("/api/conversation", methods=["POST"]) async def conversation(): + ConfigHelper.get_active_config_or_default.cache_clear() result = ConfigHelper.get_active_config_or_default() conversation_flow = result.prompts.conversational_flow if conversation_flow == ConversationFlow.CUSTOM.value: diff --git a/code/frontend/src/components/Answer/AnswerParser.tsx b/code/frontend/src/components/Answer/AnswerParser.tsx index 4238ab8eb..955214880 100644 --- a/code/frontend/src/components/Answer/AnswerParser.tsx +++ b/code/frontend/src/components/Answer/AnswerParser.tsx @@ -11,7 +11,7 @@ let filteredCitations = [] as Citation[]; // Define a function to check if a citation with the same Chunk_Id already exists in filteredCitations const isDuplicate = (citation: Citation,citationIndex:string) => { - return filteredCitations.some((c) => c.chunk_id === citation.chunk_id) && !filteredCitations.find((c) => c.id === citationIndex) ; + return filteredCitations.some((c) => c.chunk_id === citation.chunk_id) && filteredCitations.find((c) => c.id === citationIndex) ; }; export function parseAnswer(answer: AskResponse): ParsedAnswer { From 0ebc56e9d8db4089aabd955d833227bb0d3947d7 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Thu, 29 Aug 2024 18:50:36 +0530 Subject: [PATCH 07/28] fix duplicate byod stream refference --- code/create_app.py | 5 +++-- code/frontend/src/components/Answer/AnswerParser.tsx | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/code/create_app.py b/code/create_app.py index 9b9b99ac2..48c7f83fb 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -50,9 +50,10 @@ def get_citations(citation_list): for citation in citation_list.get("citations"): metadata = json.loads(citation["url"]) title = citation["title"] + url = get_markdown_url(metadata["source"], title) citations_dict["citations"].append( { - "content": citation["content"], + "content": url + "\n\n\n" + citation["content"], "id": metadata["id"], "chunk_id": ( re.findall(r"\d+", metadata["chunk_id"])[-1] @@ -61,7 +62,7 @@ def get_citations(citation_list): ), "title": title, "filepath": title.split("/")[-1], - "url": get_markdown_url(metadata["source"], title), + "url": url, } ) return citations_dict diff --git a/code/frontend/src/components/Answer/AnswerParser.tsx b/code/frontend/src/components/Answer/AnswerParser.tsx index 955214880..57dd791a0 100644 --- a/code/frontend/src/components/Answer/AnswerParser.tsx +++ b/code/frontend/src/components/Answer/AnswerParser.tsx @@ -11,7 +11,7 @@ let filteredCitations = [] as Citation[]; // Define a function to check if a citation with the same Chunk_Id already exists in filteredCitations const isDuplicate = (citation: Citation,citationIndex:string) => { - return filteredCitations.some((c) => c.chunk_id === citation.chunk_id) && filteredCitations.find((c) => c.id === citationIndex) ; + return filteredCitations.some((c) => c.chunk_id === citation.chunk_id) ; }; export function parseAnswer(answer: AskResponse): ParsedAnswer { From d556854dda5660e7bb6d728d39a6fe2a998027b6 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Mon, 2 Sep 2024 10:15:49 +0530 Subject: [PATCH 08/28] fix unit test cases --- .../backend_api/default/test_conversation.py | 10 +++++- .../with_byod/test_conversation_flow.py | 12 ++++++- .../test_azure_byod_without_data.py | 33 +++++++++++++++++-- .../utilities/helpers/test_config_helper.py | 3 +- 4 files changed, 53 insertions(+), 5 deletions(-) diff --git a/code/tests/functional/tests/backend_api/default/test_conversation.py b/code/tests/functional/tests/backend_api/default/test_conversation.py index 70d567e14..b86feda1c 100644 --- a/code/tests/functional/tests/backend_api/default/test_conversation.py +++ b/code/tests/functional/tests/backend_api/default/test_conversation.py @@ -2,6 +2,7 @@ import re import pytest from pytest_httpserver import HTTPServer +from unittest.mock import ANY, MagicMock, patch import requests from tests.request_matching import ( @@ -175,9 +176,16 @@ def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_sear ) +@patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" +) def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_store_in_conversation_log( - app_url: str, app_config: AppConfig, httpserver: HTTPServer + get_active_config_or_default_mock, + app_url: str, + app_config: AppConfig, + httpserver: HTTPServer, ): + get_active_config_or_default_mock.prompts.conversational_flow = "custom" # when requests.post(f"{app_url}{path}", json=body) diff --git a/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py b/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py index b9ea5e7f0..ef3b29c2c 100644 --- a/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py +++ b/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py @@ -1,6 +1,7 @@ import json import pytest from pytest_httpserver import HTTPServer +from unittest.mock import patch import requests from string import Template @@ -46,9 +47,18 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): httpserver.check() +@patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" +) def test_azure_byod_responds_successfully_when_streaming( - app_url: str, app_config: AppConfig, httpserver: HTTPServer + app_url: str, + app_config: AppConfig, + httpserver: HTTPServer, + get_active_config_or_default_mock, ): + get_active_config_or_default_mock.return_value.prompts.return_value = { + "conversational_flow": "byod" + } # when response = requests.post(f"{app_url}{path}", json=body) diff --git a/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py b/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py index f453e8ec0..c903b8eb3 100644 --- a/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py +++ b/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py @@ -3,6 +3,7 @@ from pytest_httpserver import HTTPServer import requests from string import Template +from unittest.mock import patch, MagicMock from tests.request_matching import ( RequestMatcher, @@ -48,9 +49,28 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): httpserver.check() +@pytest.fixture(autouse=True) +def env_helper_mock(): + with patch("backend.batch.utilities.helpers.env_helper.EnvHelper") as mock: + env_helper = mock.return_value + + yield env_helper + + +@patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" +) def test_azure_byod_responds_successfully_when_streaming( - app_url: str, app_config: AppConfig, httpserver: HTTPServer + get_active_config_or_default_mock, + app_url: str, + app_config: AppConfig, + env_helper_mock: MagicMock, ): + # given + env_helper_mock.AZURE_SEARCH_KEY = None + env_helper_mock.should_use_data.return_value = False + get_active_config_or_default_mock.return_value.prompts.conversational_flow = "byod" + # when response = requests.post(f"{app_url}{path}", json=body) @@ -80,9 +100,18 @@ def test_azure_byod_responds_successfully_when_streaming( } +@patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" +) def test_post_makes_correct_call_to_azure_openai( - app_url: str, app_config: AppConfig, httpserver: HTTPServer + get_active_config_or_default_mock, + app_url: str, + app_config: AppConfig, + httpserver: HTTPServer, ): + # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = "byod" + # when requests.post(f"{app_url}{path}", json=body) diff --git a/code/tests/utilities/helpers/test_config_helper.py b/code/tests/utilities/helpers/test_config_helper.py index 0c2c990dd..f4de97014 100644 --- a/code/tests/utilities/helpers/test_config_helper.py +++ b/code/tests/utilities/helpers/test_config_helper.py @@ -19,7 +19,8 @@ def config_dict(): "post_answering_prompt": "mock_post_answering_prompt", "enable_post_answering_prompt": False, "enable_content_safety": True, - "ai_assistant_type": "default" + "ai_assistant_type": "default", + "conversational_flow": "custom", }, "messages": { "post_answering_filter": "mock_post_answering_filter", From a7eaf4dd4adc0843009dcda632bd2bb2d02284f2 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Mon, 2 Sep 2024 10:26:41 +0530 Subject: [PATCH 09/28] app test cases updated --- code/tests/test_app.py | 133 +++++++++++++++++++++++++++++++++-------- 1 file changed, 109 insertions(+), 24 deletions(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 75deba2ce..51e9d4564 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -26,7 +26,7 @@ AZURE_SEARCH_CONTENT_VECTOR_COLUMN = "vector-column" AZURE_SEARCH_TITLE_COLUMN = "title" AZURE_SEARCH_FILENAME_COLUMN = "filename" -AZURE_SEARCH_URL_COLUMN = "url" +AZURE_SEARCH_URL_COLUMN = "metadata" AZURE_SEARCH_FILTER = "filter" AZURE_SEARCH_ENABLE_IN_DOMAIN = "true" AZURE_SEARCH_TOP_K = 5 @@ -110,7 +110,7 @@ def test_returns_speech_token_using_keys( "token": "speech-token", "region": AZURE_SPEECH_SERVICE_REGION, "languages": AZURE_SPEECH_RECOGNIZER_LANGUAGES, - "key": "mock-speech-key" + "key": "mock-speech-key", } requests.post.assert_called_once_with( @@ -154,7 +154,7 @@ def test_returns_speech_token_using_rbac( "token": "speech-token", "region": AZURE_SPEECH_SERVICE_REGION, "languages": AZURE_SPEECH_RECOGNIZER_LANGUAGES, - "key": "mock-key1" + "key": "mock-key1", } requests.post.assert_called_once_with( @@ -214,6 +214,7 @@ class TestConversationCustom: def setup_method(self): """Set up the test data.""" self.orchestrator_config = {"strategy": "langchain"} + self.conversastion_flow = {"conversation_flow": "custom"} self.messages = [ { "content": '{"citations": [], "intent": "A question?"}', @@ -245,6 +246,9 @@ def test_conversation_custom_returns_correct_response( ): """Test that the custom conversation endpoint returns the correct response.""" # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "custom" + ) get_active_config_or_default_mock.return_value.orchestrator.return_value = ( self.orchestrator_config ) @@ -397,7 +401,6 @@ def test_conversation_custom_allows_multiple_messages_from_user( self, get_orchestrator_config_mock, get_message_orchestrator_mock, - env_helper_mock, client, ): """This can happen if there was an error getting a response from the assistant for the previous user message.""" @@ -438,11 +441,18 @@ def test_conversation_custom_allows_multiple_messages_from_user( orchestrator=self.orchestrator_config, ) + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_returns_error_response_on_incorrect_conversation_flow_input( - self, env_helper_mock, client + self, + get_active_config_or_default_mock, + client, ): # given - env_helper_mock.CONVERSATION_FLOW = "bob" + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "bob" + ) # when response = client.post( @@ -487,6 +497,7 @@ def setup_method(self): { "content": "content", "title": "title", + "url": '{"id": "doc_id", "source": "https://strgwchoykpenmmu.blob.core.windows.net/documents/MSFT_FY23Q4_10K.docx_SAS_TOKEN_PLACEHOLDER_", "title": "/documents/MSFT_FY23Q4_10K.docx", "chunk": 46, "chunk_id": null}', } ], "intent": "intent", @@ -513,6 +524,7 @@ def setup_method(self): { "content": "content", "title": "title", + "url": '{"id": "doc_id", "source": "https://strgwchoykpenmmu.blob.core.windows.net/documents/MSFT_FY23Q4_10K.docx_SAS_TOKEN_PLACEHOLDER_", "title": "/documents/MSFT_FY23Q4_10K.docx", "chunk": 46, "chunk_id": null}', } ], "intent": "intent", @@ -557,10 +569,13 @@ def setup_method(self): ] @patch("create_app.AzureOpenAI") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_correct_response_when_streaming_with_data_keys( self, + get_active_config_or_default_mock, azure_openai_mock: MagicMock, - env_helper_mock: MagicMock, client: FlaskClient, ): """Test that the Azure BYOD conversation endpoint returns the correct response.""" @@ -570,7 +585,9 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da self.mock_streamed_response ) - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) # when response = client.post( @@ -641,8 +658,12 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da ) @patch("create_app.AzureOpenAI") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_correct_response_when_streaming_with_data_rbac( self, + get_active_config_or_default_mock, azure_openai_mock: MagicMock, env_helper_mock: MagicMock, client: FlaskClient, @@ -650,7 +671,9 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da """Test that the Azure BYOD conversation endpoint returns the correct response.""" # given env_helper_mock.is_auth_type_keys.return_value = False - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) openai_client_mock = azure_openai_mock.return_value openai_client_mock.chat.completions.create.return_value = ( self.mock_streamed_response @@ -691,8 +714,12 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da } @patch("create_app.AzureOpenAI") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_correct_response_when_not_streaming_with_data( self, + get_active_config_or_default_mock, azure_openai_mock: MagicMock, env_helper_mock: MagicMock, client: FlaskClient, @@ -700,7 +727,9 @@ def test_conversation_azure_byod_returns_correct_response_when_not_streaming_wit """Test that the Azure BYOD conversation endpoint returns the correct response.""" # given env_helper_mock.SHOULD_STREAM = False - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) openai_client_mock = azure_openai_mock.return_value openai_client_mock.chat.completions.create.return_value = self.mock_response @@ -738,13 +767,21 @@ def test_conversation_azure_byod_returns_correct_response_when_not_streaming_wit } @patch("create_app.conversation_with_data") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_500_when_exception_occurs( - self, conversation_with_data_mock, env_helper_mock, client + self, + get_active_config_or_default_mock, + conversation_with_data_mock, + client, ): """Test that an error response is returned when an exception occurs.""" # given conversation_with_data_mock.side_effect = Exception("Test exception") - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) # when response = client.post( @@ -760,8 +797,14 @@ def test_conversation_azure_byod_returns_500_when_exception_occurs( } @patch("create_app.conversation_with_data") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_500_when_internalservererror_occurs( - self, conversation_with_data_mock, env_helper_mock, client + self, + get_active_config_or_default_mock, + conversation_with_data_mock, + client, ): """Test that an error response is returned when an exception occurs.""" # given @@ -770,7 +813,9 @@ def test_conversation_azure_byod_returns_500_when_internalservererror_occurs( conversation_with_data_mock.side_effect = InternalServerError( "Test exception", response=response_mock, body="" ) - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) # when response = client.post( @@ -787,8 +832,14 @@ def test_conversation_azure_byod_returns_500_when_internalservererror_occurs( } @patch("create_app.conversation_with_data") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_429_on_rate_limit_error( - self, conversation_with_data_mock, env_helper_mock, client + self, + get_active_config_or_default_mock, + conversation_with_data_mock, + client, ): """Test that a 429 response is returned on RateLimitError for BYOD conversation.""" # given @@ -807,7 +858,9 @@ def test_conversation_azure_byod_returns_429_on_rate_limit_error( conversation_with_data_mock.side_effect = BadRequestError( message="Error code: 400", response=response_mock, body="" ) - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) # when response = client.post( @@ -824,14 +877,23 @@ def test_conversation_azure_byod_returns_429_on_rate_limit_error( } @patch("create_app.AzureOpenAI") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_correct_response_when_not_streaming_without_data_keys( - self, azure_openai_mock, env_helper_mock, client + self, + get_active_config_or_default_mock, + azure_openai_mock, + env_helper_mock, + client, ): """Test that the Azure BYOD conversation endpoint returns the correct response.""" # given env_helper_mock.should_use_data.return_value = False env_helper_mock.SHOULD_STREAM = False - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) openai_client_mock = MagicMock() azure_openai_mock.return_value = openai_client_mock @@ -889,8 +951,15 @@ def test_conversation_azure_byod_returns_correct_response_when_not_streaming_wit ) @patch("create_app.AzureOpenAI") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_correct_response_when_not_streaming_without_data_rbac( - self, azure_openai_mock, env_helper_mock, client + self, + get_active_config_or_default_mock, + azure_openai_mock, + env_helper_mock, + client, ): """Test that the Azure BYOD conversation endpoint returns the correct response.""" # given @@ -898,7 +967,9 @@ def test_conversation_azure_byod_returns_correct_response_when_not_streaming_wit env_helper_mock.SHOULD_STREAM = False env_helper_mock.AZURE_AUTH_TYPE = "rbac" env_helper_mock.AZURE_OPENAI_STOP_SEQUENCE = "" - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) openai_client_mock = MagicMock() azure_openai_mock.return_value = openai_client_mock @@ -956,13 +1027,22 @@ def test_conversation_azure_byod_returns_correct_response_when_not_streaming_wit ) @patch("create_app.AzureOpenAI") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_returns_correct_response_when_streaming_without_data( - self, azure_openai_mock, env_helper_mock, client + self, + get_active_config_or_default_mock, + azure_openai_mock, + env_helper_mock, + client, ): """Test that the Azure BYOD conversation endpoint returns the correct response.""" # given env_helper_mock.should_use_data.return_value = False - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) openai_client_mock = MagicMock() azure_openai_mock.return_value = openai_client_mock @@ -994,19 +1074,24 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_without ) @patch("create_app.AzureOpenAI") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_azure_byod_uses_semantic_config( self, + get_active_config_or_default_mock, azure_openai_mock: MagicMock, - env_helper_mock: MagicMock, client: FlaskClient, ): """Test that the Azure BYOD conversation endpoint uses the semantic configuration.""" # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "byod" + ) openai_client_mock = azure_openai_mock.return_value openai_client_mock.chat.completions.create.return_value = ( self.mock_streamed_response ) - env_helper_mock.CONVERSATION_FLOW = ConversationFlow.BYOD.value # when response = client.post( From c035faabee611f34b7d824866073983b5ee39f21 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Mon, 2 Sep 2024 11:48:46 +0530 Subject: [PATCH 10/28] app test cases updated --- code/create_app.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/code/create_app.py b/code/create_app.py index 48c7f83fb..f7c993a4a 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -21,8 +21,6 @@ from backend.batch.utilities.helpers.config.conversation_flow import ConversationFlow from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient from azure.identity import DefaultAzureCredential -from backend.batch.utilities.tools.question_answer_tool import QuestionAnswerTool -from backend.batch.utilities.parser.output_parser_tool import OutputParserTool from backend.batch.utilities.helpers.azure_blob_storage_client import ( AzureBlobStorageClient, ) @@ -211,27 +209,31 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper): ) if not env_helper.SHOULD_STREAM: - user_messages = list( - filter( - lambda x: x["role"] in ("user", "assistant"), - conversation.json["messages"][0:-1], - ) - ) - answer = QuestionAnswerTool().answer_question( - messages[-1]["content"], user_messages - ) - # Format the output for the UI - messages = OutputParserTool().parse( - question=answer.question, - answer=answer.answer, - source_documents=answer.source_documents, - ) + citations = get_citations(response.choices[0].message.model_extra["context"]) response_obj = { "id": response.id, "model": response.model, "created": response.created, "object": response.object, - "choices": [{"messages": messages}], + "choices": [ + { + "messages": [ + { + "content": json.dumps( + citations, + ensure_ascii=False, + ), + "end_turn": False, + "role": "tool", + }, + { + "end_turn": True, + "content": response.choices[0].message.content, + "role": "assistant", + }, + ] + } + ], } return response_obj From a1e235ce3f3c9a319b4ec22f494744f63c91da03 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 10:19:53 +0530 Subject: [PATCH 11/28] test app tested cases updated --- code/tests/test_app.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 51e9d4564..a6ef09248 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -278,8 +278,12 @@ def test_conversation_custom_returns_correct_response( @patch("create_app.get_message_orchestrator") @patch("create_app.get_orchestrator_config") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_custom_calls_message_orchestrator_correctly( self, + get_active_config_or_default_mock, get_orchestrator_config_mock, get_message_orchestrator_mock, env_helper_mock, @@ -287,6 +291,9 @@ def test_conversation_custom_calls_message_orchestrator_correctly( ): """Test that the custom conversation endpoint calls the message orchestrator correctly.""" # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "custom" + ) get_orchestrator_config_mock.return_value = self.orchestrator_config message_orchestrator_mock = AsyncMock() @@ -332,11 +339,17 @@ def test_conversaation_custom_returns_error_response_on_exception( } @patch("create_app.get_orchestrator_config") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_custom_returns_error_response_on_rate_limit_error( - self, get_orchestrator_config_mock, env_helper_mock, client + self, get_active_config_or_default_mock, get_orchestrator_config_mock, client ): """Test that a 429 response is returned on RateLimitError.""" # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "custom" + ) response_mock = Mock() response_mock.status_code = 429 response_mock.json.return_value = { @@ -397,8 +410,12 @@ def test_conversation_custom_returns_500_when_internalservererror_occurs( @patch("create_app.get_message_orchestrator") @patch("create_app.get_orchestrator_config") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_custom_allows_multiple_messages_from_user( self, + get_active_config_or_default_mock, get_orchestrator_config_mock, get_message_orchestrator_mock, client, @@ -406,6 +423,9 @@ def test_conversation_custom_allows_multiple_messages_from_user( """This can happen if there was an error getting a response from the assistant for the previous user message.""" # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "custom" + ) get_orchestrator_config_mock.return_value = self.orchestrator_config message_orchestrator_mock = AsyncMock() @@ -497,7 +517,7 @@ def setup_method(self): { "content": "content", "title": "title", - "url": '{"id": "doc_id", "source": "https://strgwchoykpenmmu.blob.core.windows.net/documents/MSFT_FY23Q4_10K.docx_SAS_TOKEN_PLACEHOLDER_", "title": "/documents/MSFT_FY23Q4_10K.docx", "chunk": 46, "chunk_id": null}', + "url": '{"id": "doc_id", "source": "source", "title": "title", "chunk": 46, "chunk_id": null}', } ], "intent": "intent", @@ -524,7 +544,7 @@ def setup_method(self): { "content": "content", "title": "title", - "url": '{"id": "doc_id", "source": "https://strgwchoykpenmmu.blob.core.windows.net/documents/MSFT_FY23Q4_10K.docx_SAS_TOKEN_PLACEHOLDER_", "title": "/documents/MSFT_FY23Q4_10K.docx", "chunk": 46, "chunk_id": null}', + "url": '{"id": "doc_id", "source": "source", "title": "title", "chunk": 46, "chunk_id": null}', } ], "intent": "intent", From d7cbb3dc41e61fbeb6337b0727e3a8421f331ea8 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 14:46:46 +0530 Subject: [PATCH 12/28] updated env helper --- code/backend/batch/utilities/helpers/env_helper.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index 0cff6b52d..eb17761bc 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -155,7 +155,7 @@ def __load_config(self, **kwargs) -> None: # Set env for Azure OpenAI self.AZURE_OPENAI_ENDPOINT = os.environ.get( "AZURE_OPENAI_ENDPOINT", - f"https://{self.AZURE_OPENAI_RESOURCE}.openai.azure.com/", + f"https://{self.AZURE_OPENAI_RESOURCE}.openai.azure.com", ) # Set env for OpenAI SDK @@ -211,10 +211,6 @@ def __load_config(self, **kwargs) -> None: self.ORCHESTRATION_STRATEGY = os.getenv( "ORCHESTRATION_STRATEGY", "openai_function" ) - # Conversation Type - which chooses between custom or byod - self.CONVERSATION_FLOW = os.getenv( - "CONVERSATION_FLOW", ConversationFlow.CUSTOM.value - ) # Speech Service self.AZURE_SPEECH_SERVICE_NAME = os.getenv("AZURE_SPEECH_SERVICE_NAME", "") self.AZURE_SPEECH_SERVICE_REGION = os.getenv("AZURE_SPEECH_SERVICE_REGION") From d69b2cab67c2e8649c25a77b898e3b62125fa7e0 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 15:44:06 +0530 Subject: [PATCH 13/28] updated env helper --- code/backend/batch/utilities/helpers/env_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index eb17761bc..d1f166f4e 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -155,7 +155,7 @@ def __load_config(self, **kwargs) -> None: # Set env for Azure OpenAI self.AZURE_OPENAI_ENDPOINT = os.environ.get( "AZURE_OPENAI_ENDPOINT", - f"https://{self.AZURE_OPENAI_RESOURCE}.openai.azure.com", + f"https://{self.AZURE_OPENAI_RESOURCE}.openai.azure.com/", ) # Set env for OpenAI SDK From fbebb826aef3d980af1a5b048d527cee11f543ca Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 17:16:24 +0530 Subject: [PATCH 14/28] test conversastion updated --- .../tests/backend_api/default/test_conversation.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/code/tests/functional/tests/backend_api/default/test_conversation.py b/code/tests/functional/tests/backend_api/default/test_conversation.py index b86feda1c..2f826ad08 100644 --- a/code/tests/functional/tests/backend_api/default/test_conversation.py +++ b/code/tests/functional/tests/backend_api/default/test_conversation.py @@ -176,16 +176,11 @@ def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_sear ) -@patch( - "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" -) def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_store_in_conversation_log( - get_active_config_or_default_mock, app_url: str, app_config: AppConfig, httpserver: HTTPServer, ): - get_active_config_or_default_mock.prompts.conversational_flow = "custom" # when requests.post(f"{app_url}{path}", json=body) @@ -657,9 +652,13 @@ def test_post_makes_correct_call_to_store_conversation_in_search( ) +@patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" +) def test_post_returns_error_when_downstream_fails( - app_url: str, app_config: AppConfig, httpserver: HTTPServer + get_active_config_or_default_mock, app_url: str, httpserver: HTTPServer ): + get_active_config_or_default_mock.return_value.prompts.conversational_flow = "custom" httpserver.expect_oneshot_request( re.compile(".*"), ).respond_with_json({}, status=403) From 9e05659454513f106ded6cb9bee8ce497d4aad08 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 19:07:14 +0530 Subject: [PATCH 15/28] test conversastion updated --- code/create_app.py | 6 ++++- .../with_byod/test_conversation_flow.py | 24 ++++++++++++------- code/tests/request_matching.py | 10 ++++---- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/code/create_app.py b/code/create_app.py index f7c993a4a..5e5c47de5 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -46,7 +46,11 @@ def get_citations(citation_list): citations_dict = {"citations": []} for citation in citation_list.get("citations"): - metadata = json.loads(citation["url"]) + metadata = ( + json.loads(citation["url"]) + if isinstance(citation["url"], str) + else citation["url"] + ) title = citation["title"] url = get_markdown_url(metadata["source"], title) citations_dict["citations"].append( diff --git a/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py b/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py index ef3b29c2c..340a697ae 100644 --- a/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py +++ b/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py @@ -31,7 +31,7 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): method="POST", ).respond_with_data( Template( - r"""data: {"id":"92f715be-cfc4-4ae6-80f8-c86b7955f6af","model":"$model","created":1712077271,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","context":{"citations":[{"content":"document","title":"/documents/doc.pdf","url":null,"filepath":null,"chunk_id":"0"}],"intent":"[\"intent\"]"}},"end_turn":false,"finish_reason":null}]} + r"""data: {"id":"92f715be-cfc4-4ae6-80f8-c86b7955f6af","model":"$model","created":1712077271,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","context":{"citations":[{"content":"document","title":"/documents/doc.pdf","url":{"id": "id", "source": "source", "title": "title", "chunk": 46, "chunk_id": null},"filepath":null,"chunk_id":"0"}],"intent":"[\"intent\"]"}},"end_turn":false,"finish_reason":null}]} data: {"id":"92f715be-cfc4-4ae6-80f8-c86b7955f6af","model":"$model","created":1712077271,"object":"extensions.chat.completion.chunk","choices":[{"index":0,"delta":{"content":"42 is the meaning of life"},"end_turn":false,"finish_reason":null}],"system_fingerprint":"fp_68a7d165bf"} @@ -51,14 +51,12 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" ) def test_azure_byod_responds_successfully_when_streaming( + get_active_config_or_default_mock, app_url: str, app_config: AppConfig, - httpserver: HTTPServer, - get_active_config_or_default_mock, ): - get_active_config_or_default_mock.return_value.prompts.return_value = { - "conversational_flow": "byod" - } + get_active_config_or_default_mock.return_value.prompts.conversational_flow = "byod" + # when response = requests.post(f"{app_url}{path}", json=body) @@ -79,7 +77,7 @@ def test_azure_byod_responds_successfully_when_streaming( { "messages": [ { - "content": r'{"citations": [{"content": "document", "title": "/documents/doc.pdf", "url": null, "filepath": null, "chunk_id": "0"}], "intent": "[\"intent\"]"}', + "content": '{"citations": [{"content": "[/documents/doc.pdf](source)\\n\\n\\ndocument", "id": "id", "chunk_id": 46, "title": "/documents/doc.pdf", "filepath": "doc.pdf", "url": "[/documents/doc.pdf](source)"}]}', "end_turn": False, "role": "tool", }, @@ -94,9 +92,17 @@ def test_azure_byod_responds_successfully_when_streaming( } +@patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" +) def test_post_makes_correct_call_to_azure_openai( - app_url: str, app_config: AppConfig, httpserver: HTTPServer + get_active_config_or_default_mock, + app_url: str, + app_config: AppConfig, + httpserver: HTTPServer, ): + get_active_config_or_default_mock.return_value.prompts.conversational_flow = "byod" + # when requests.post(f"{app_url}{path}", json=body) @@ -128,7 +134,7 @@ def test_post_makes_correct_call_to_azure_openai( ) ], "title_field": "title", - "url_field": "url", + "url_field": "source", "filepath_field": "filepath", }, "filter": app_config.get("AZURE_SEARCH_FILTER"), diff --git a/code/tests/request_matching.py b/code/tests/request_matching.py index 2cb91fc98..c65852155 100644 --- a/code/tests/request_matching.py +++ b/code/tests/request_matching.py @@ -46,11 +46,11 @@ def verify_request_made( if request.method != request_matcher.method: continue - if ( - request_matcher.json is not None - and request.json != request_matcher.json - ): - continue + # if ( + # request_matcher.json is not None + # and request.json != request_matcher.json + # ): + # continue if request_matcher.headers is not None and not contains_all_headers( request_matcher, request From fd8fc0604bb47a044c08b8f9e6b004a4cbfb41d2 Mon Sep 17 00:00:00 2001 From: AjitPadhi-Microsoft Date: Tue, 3 Sep 2024 19:41:56 +0530 Subject: [PATCH 16/28] Update request_matching.py --- code/tests/request_matching.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/code/tests/request_matching.py b/code/tests/request_matching.py index c65852155..2cb91fc98 100644 --- a/code/tests/request_matching.py +++ b/code/tests/request_matching.py @@ -46,11 +46,11 @@ def verify_request_made( if request.method != request_matcher.method: continue - # if ( - # request_matcher.json is not None - # and request.json != request_matcher.json - # ): - # continue + if ( + request_matcher.json is not None + and request.json != request_matcher.json + ): + continue if request_matcher.headers is not None and not contains_all_headers( request_matcher, request From 94cd582eed3115e40d2d8591215ac25469d82add Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 22:09:42 +0530 Subject: [PATCH 17/28] test app unit test updated --- code/tests/test_app.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index a6ef09248..5dac1e2e8 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -623,9 +623,9 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da data = str(response.data, "utf-8") assert ( data - == r"""{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"content\", \"title\": \"title\"}], \"intent\": \"intent\"}", "end_turn": false, "role": "tool"}, {"content": "", "end_turn": false, "role": "assistant"}]}]} -{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"content\", \"title\": \"title\"}], \"intent\": \"intent\"}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": false, "role": "assistant"}]}]} -{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"content\", \"title\": \"title\"}], \"intent\": \"intent\"}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": true, "role": "assistant"}]}]} + == r"""{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"[title](source)\\n\\n\\ncontent\", \"id\": \"doc_id\", \"chunk_id\": 46, \"title\": \"title\", \"filepath\": \"title\", \"url\": \"[title](source)\"}]}", "end_turn": false, "role": "tool"}, {"content": "", "end_turn": false, "role": "assistant"}]}]} +{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"[title](source)\\n\\n\\ncontent\", \"id\": \"doc_id\", \"chunk_id\": 46, \"title\": \"title\", \"filepath\": \"title\", \"url\": \"[title](source)\"}]}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": false, "role": "assistant"}]}]} +{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"[title](source)\\n\\n\\ncontent\", \"id\": \"doc_id\", \"chunk_id\": 46, \"title\": \"title\", \"filepath\": \"title\", \"url\": \"[title](source)\"}]}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": true, "role": "assistant"}]}]} """ ) @@ -713,9 +713,9 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da data = str(response.data, "utf-8") assert ( data - == r"""{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"content\", \"title\": \"title\"}], \"intent\": \"intent\"}", "end_turn": false, "role": "tool"}, {"content": "", "end_turn": false, "role": "assistant"}]}]} -{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"content\", \"title\": \"title\"}], \"intent\": \"intent\"}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": false, "role": "assistant"}]}]} -{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"content\", \"title\": \"title\"}], \"intent\": \"intent\"}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": true, "role": "assistant"}]}]} + == r"""{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"[title](source)\\n\\n\\ncontent\", \"id\": \"doc_id\", \"chunk_id\": 46, \"title\": \"title\", \"filepath\": \"title\", \"url\": \"[title](source)\"}]}", "end_turn": false, "role": "tool"}, {"content": "", "end_turn": false, "role": "assistant"}]}]} +{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"[title](source)\\n\\n\\ncontent\", \"id\": \"doc_id\", \"chunk_id\": 46, \"title\": \"title\", \"filepath\": \"title\", \"url\": \"[title](source)\"}]}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": false, "role": "assistant"}]}]} +{"id": "response.id", "model": "mock-openai-model", "created": 0, "object": "response.object", "choices": [{"messages": [{"content": "{\"citations\": [{\"content\": \"[title](source)\\n\\n\\ncontent\", \"id\": \"doc_id\", \"chunk_id\": 46, \"title\": \"title\", \"filepath\": \"title\", \"url\": \"[title](source)\"}]}", "end_turn": false, "role": "tool"}, {"content": "A question\n?", "end_turn": true, "role": "assistant"}]}]} """ ) @@ -772,7 +772,7 @@ def test_conversation_azure_byod_returns_correct_response_when_not_streaming_wit { "messages": [ { - "content": '{"citations": [{"content": "content", "title": "title"}], "intent": "intent"}', + "content": '{"citations": [{"content": "[title](source)\\n\\n\\ncontent", "id": "doc_id", "chunk_id": 46, "title": "title", "filepath": "title", "url": "[title](source)"}]}', "end_turn": False, "role": "tool", }, From 6a10b355295ffc402df3bd0b4a4ae6861c78cfec Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 22:22:29 +0530 Subject: [PATCH 18/28] test app unit test updated --- code/tests/test_app.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 5dac1e2e8..f9a6a5862 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -318,11 +318,17 @@ def test_conversation_custom_calls_message_orchestrator_correctly( ) @patch("create_app.get_orchestrator_config") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversaation_custom_returns_error_response_on_exception( - self, get_orchestrator_config_mock, env_helper_mock, client + self, get_active_config_or_default_mock, get_orchestrator_config_mock, client ): """Test that an error response is returned when an exception occurs.""" # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "custom" + ) get_orchestrator_config_mock.side_effect = Exception("An error occurred") # when @@ -384,7 +390,7 @@ def test_conversation_custom_returns_error_response_on_rate_limit_error( @patch("create_app.get_orchestrator_config") def test_conversation_custom_returns_500_when_internalservererror_occurs( - self, get_orchestrator_config_mock, env_helper_mock, client + self, get_orchestrator_config_mock, client ): """Test that an error response is returned when an exception occurs.""" # given From 560456c6aaeb95b0fafe1e0591f7be55eac6f049 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Tue, 3 Sep 2024 22:39:56 +0530 Subject: [PATCH 19/28] test app unit test updated --- code/tests/test_app.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index f9a6a5862..3d90e7a43 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -389,11 +389,17 @@ def test_conversation_custom_returns_error_response_on_rate_limit_error( } @patch("create_app.get_orchestrator_config") + @patch( + "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" + ) def test_conversation_custom_returns_500_when_internalservererror_occurs( - self, get_orchestrator_config_mock, client + self, get_active_config_or_default_mock, get_orchestrator_config_mock, client ): """Test that an error response is returned when an exception occurs.""" # given + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "custom" + ) response_mock = MagicMock() response_mock.status_code = 500 get_orchestrator_config_mock.side_effect = InternalServerError( From a97feabdb6d8e9809f343fe5e43691287f4f3946 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Wed, 4 Sep 2024 13:21:02 +0530 Subject: [PATCH 20/28] unit test issue fixed --- .../tests/backend_api/with_byod/test_conversation_flow.py | 8 ++++---- code/tests/test_app.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py b/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py index 340a697ae..c0e5537fe 100644 --- a/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py +++ b/code/tests/functional/tests/backend_api/with_byod/test_conversation_flow.py @@ -129,12 +129,12 @@ def test_post_makes_correct_call_to_azure_openai( "fields_mapping": { "content_fields": ["content"], "vector_fields": [ - app_config.get( - "AZURE_SEARCH_CONTENT_VECTOR_COLUMN" - ) + app_config.get("AZURE_SEARCH_CONTENT_VECTOR_COLUMN") ], "title_field": "title", - "url_field": "source", + "url_field": app_config.get( + "AZURE_SEARCH_FIELDS_METADATA" + ), "filepath_field": "filepath", }, "filter": app_config.get("AZURE_SEARCH_FILTER"), diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 3d90e7a43..2c5aaade9 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -608,6 +608,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da self, get_active_config_or_default_mock, azure_openai_mock: MagicMock, + env_helper_mock, client: FlaskClient, ): """Test that the Azure BYOD conversation endpoint returns the correct response.""" @@ -670,7 +671,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da "content_fields": ["field1", "field2"], "vector_fields": [AZURE_SEARCH_CONTENT_VECTOR_COLUMN], "title_field": AZURE_SEARCH_TITLE_COLUMN, - "url_field": AZURE_SEARCH_URL_COLUMN, + "url_field": env_helper_mock.AZURE_SEARCH_FIELDS_METADATA, "filepath_field": AZURE_SEARCH_FILENAME_COLUMN, }, "filter": AZURE_SEARCH_FILTER, From f11113a13ca45915b8e5aa4f0841b6060cb59380 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Wed, 4 Sep 2024 14:10:10 +0530 Subject: [PATCH 21/28] lint issue fix --- .../utilities/helpers/config/config_helper.py | 15 ++++++++++----- .../backend/batch/utilities/helpers/env_helper.py | 9 ++++++--- .../backend_api/default/test_conversation.py | 6 ++++-- code/tests/test_app.py | 3 +-- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/code/backend/batch/utilities/helpers/config/config_helper.py b/code/backend/batch/utilities/helpers/config/config_helper.py index f88c6f4d3..2a7b3145c 100644 --- a/code/backend/batch/utilities/helpers/config/config_helper.py +++ b/code/backend/batch/utilities/helpers/config/config_helper.py @@ -171,7 +171,9 @@ def _set_new_config_properties(config: dict, default_config: dict): config["example"] = default_config["example"] if config["prompts"].get("ai_assistant_type") is None: - config["prompts"]["ai_assistant_type"] = default_config["prompts"]["ai_assistant_type"] + config["prompts"]["ai_assistant_type"] = default_config["prompts"][ + "ai_assistant_type" + ] if config.get("integrated_vectorization_config") is None: config["integrated_vectorization_config"] = default_config[ @@ -179,7 +181,9 @@ def _set_new_config_properties(config: dict, default_config: dict): ] if config["prompts"].get("conversational_flow") is None: - config["prompts"]["conversational_flow"] = default_config["prompts"]["conversational_flow"] + config["prompts"]["conversational_flow"] = default_config["prompts"][ + "conversational_flow" + ] @staticmethod @functools.cache @@ -207,7 +211,6 @@ def get_default_assistant_prompt(): config = ConfigHelper.get_default_config() return config["prompts"]["answering_user_prompt"] - @staticmethod def save_config_as_active(config): ConfigHelper.validate_config(config) @@ -256,12 +259,14 @@ def get_default_config(): @staticmethod @functools.cache def get_default_contract_assistant(): - contract_file_path = os.path.join(os.path.dirname(__file__), "default_contract_assistant_prompt.txt") + contract_file_path = os.path.join( + os.path.dirname(__file__), "default_contract_assistant_prompt.txt" + ) contract_assistant = "" with open(contract_file_path, encoding="utf-8") as f: contract_assistant = f.readlines() - return ''.join([str(elem) for elem in contract_assistant]) + return "".join([str(elem) for elem in contract_assistant]) @staticmethod def clear_config(): diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index d1f166f4e..873bce6ec 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -4,7 +4,6 @@ from dotenv import load_dotenv from azure.identity import DefaultAzureCredential, get_bearer_token_provider from azure.keyvault.secrets import SecretClient -from .config.conversation_flow import ConversationFlow logger = logging.getLogger(__name__) @@ -69,9 +68,13 @@ def __load_config(self, **kwargs) -> None: self.AZURE_SEARCH_FIELDS_METADATA = os.getenv( "AZURE_SEARCH_FIELDS_METADATA", "metadata" ) - self.AZURE_SEARCH_SOURCE_COLUMN = os.getenv("AZURE_SEARCH_SOURCE_COLUMN", "source") + self.AZURE_SEARCH_SOURCE_COLUMN = os.getenv( + "AZURE_SEARCH_SOURCE_COLUMN", "source" + ) self.AZURE_SEARCH_CHUNK_COLUMN = os.getenv("AZURE_SEARCH_CHUNK_COLUMN", "chunk") - self.AZURE_SEARCH_OFFSET_COLUMN = os.getenv("AZURE_SEARCH_OFFSET_COLUMN", "offset") + self.AZURE_SEARCH_OFFSET_COLUMN = os.getenv( + "AZURE_SEARCH_OFFSET_COLUMN", "offset" + ) self.AZURE_SEARCH_CONVERSATIONS_LOG_INDEX = os.getenv( "AZURE_SEARCH_CONVERSATIONS_LOG_INDEX", "conversations" ) diff --git a/code/tests/functional/tests/backend_api/default/test_conversation.py b/code/tests/functional/tests/backend_api/default/test_conversation.py index 2f826ad08..645529fe7 100644 --- a/code/tests/functional/tests/backend_api/default/test_conversation.py +++ b/code/tests/functional/tests/backend_api/default/test_conversation.py @@ -2,7 +2,7 @@ import re import pytest from pytest_httpserver import HTTPServer -from unittest.mock import ANY, MagicMock, patch +from unittest.mock import patch import requests from tests.request_matching import ( @@ -658,7 +658,9 @@ def test_post_makes_correct_call_to_store_conversation_in_search( def test_post_returns_error_when_downstream_fails( get_active_config_or_default_mock, app_url: str, httpserver: HTTPServer ): - get_active_config_or_default_mock.return_value.prompts.conversational_flow = "custom" + get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( + "custom" + ) httpserver.expect_oneshot_request( re.compile(".*"), ).respond_with_json({}, status=403) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 2c5aaade9..c272167c0 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -214,7 +214,6 @@ class TestConversationCustom: def setup_method(self): """Set up the test data.""" self.orchestrator_config = {"strategy": "langchain"} - self.conversastion_flow = {"conversation_flow": "custom"} self.messages = [ { "content": '{"citations": [], "intent": "A question?"}', @@ -608,7 +607,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da self, get_active_config_or_default_mock, azure_openai_mock: MagicMock, - env_helper_mock, + env_helper_mock: MagicMock, client: FlaskClient, ): """Test that the Azure BYOD conversation endpoint returns the correct response.""" From 08cd33be1b447745948220d36b4574ec0a31bcb5 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Thu, 5 Sep 2024 12:17:36 +0530 Subject: [PATCH 22/28] updated admin tab --- code/backend/Admin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/code/backend/Admin.py b/code/backend/Admin.py index 627996263..373560ffc 100644 --- a/code/backend/Admin.py +++ b/code/backend/Admin.py @@ -51,6 +51,7 @@ def load_css(file_path): """ * If you want to ingest data (pdf, websites, etc.), then use the `Ingest Data` tab * If you want to explore how your data was chunked, check the `Explore Data` tab + * If you want to delete your data, check the `Delete Data` tab * If you want to adapt the underlying prompts, logging settings and others, use the `Configuration` tab """ ) From 20cbca82a439e879c78e19be8b721eb468ec5912 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Fri, 6 Sep 2024 16:26:34 +0530 Subject: [PATCH 23/28] updated citation in byod --- code/create_app.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/code/create_app.py b/code/create_app.py index 5e5c47de5..896db501c 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -30,20 +30,19 @@ logger = logging.getLogger(__name__) -def get_markdown_url(source, title): +def get_markdown_url(source, title, container_sas): """Get Markdown URL for a citation""" url = quote(source, safe=":/") if "_SAS_TOKEN_PLACEHOLDER_" in url: - blob_client = AzureBlobStorageClient() - container_sas = blob_client.get_container_sas() url = url.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas) return f"[{title}]({url})" def get_citations(citation_list): """Returns Formated Citations""" - + blob_client = AzureBlobStorageClient() + container_sas = blob_client.get_container_sas() citations_dict = {"citations": []} for citation in citation_list.get("citations"): metadata = ( @@ -52,7 +51,7 @@ def get_citations(citation_list): else citation["url"] ) title = citation["title"] - url = get_markdown_url(metadata["source"], title) + url = get_markdown_url(metadata["source"], title, container_sas) citations_dict["citations"].append( { "content": url + "\n\n\n" + citation["content"], From a78f8b7d44f5c8e5f0875e7cb0810782f60846a7 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Fri, 6 Sep 2024 16:50:49 +0530 Subject: [PATCH 24/28] lint issue fix --- code/tests/test_app.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index c272167c0..2cc32208a 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -599,6 +599,16 @@ def setup_method(self): ), ] + @pytest.fixture(autouse=True) + def azure_blob_service_mock(): + with patch( + "backend.batch.utilities.tools.question_answer_tool.AzureBlobStorageClient" + ) as mock: + blob_helper = mock.return_value + blob_helper.get_container_sas.return_value = "mock sas" + + yield blob_helper + @patch("create_app.AzureOpenAI") @patch( "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" From 11e18c94971eb8e9bb3ab71b261975876342be5a Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Fri, 6 Sep 2024 17:21:52 +0530 Subject: [PATCH 25/28] lint issue fix --- code/tests/test_app.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 2cc32208a..30344b4a3 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -89,6 +89,16 @@ def env_helper_mock(): yield env_helper + @pytest.fixture(autouse=True) + def azure_blob_clints_mock(): + with patch( + "backend.batch.utilities.tools.question_answer_tool.AzureBlobStorageClient" + ) as mock: + blob_helper = mock.return_value + blob_helper.get_container_sas.return_value = "mock sas" + + yield blob_helper + class TestSpeechToken: @patch("create_app.requests") @@ -599,16 +609,6 @@ def setup_method(self): ), ] - @pytest.fixture(autouse=True) - def azure_blob_service_mock(): - with patch( - "backend.batch.utilities.tools.question_answer_tool.AzureBlobStorageClient" - ) as mock: - blob_helper = mock.return_value - blob_helper.get_container_sas.return_value = "mock sas" - - yield blob_helper - @patch("create_app.AzureOpenAI") @patch( "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" From bb7cefd6bd78701c70270fff63bc78a7ff80adcb Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Fri, 6 Sep 2024 17:36:53 +0530 Subject: [PATCH 26/28] lint issue fix --- code/tests/test_app.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 30344b4a3..5da74e611 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -89,16 +89,6 @@ def env_helper_mock(): yield env_helper - @pytest.fixture(autouse=True) - def azure_blob_clints_mock(): - with patch( - "backend.batch.utilities.tools.question_answer_tool.AzureBlobStorageClient" - ) as mock: - blob_helper = mock.return_value - blob_helper.get_container_sas.return_value = "mock sas" - - yield blob_helper - class TestSpeechToken: @patch("create_app.requests") @@ -613,8 +603,10 @@ def setup_method(self): @patch( "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" ) + @patch("backend.batch.utilities.common.source_document.AzureBlobStorageClient") def test_conversation_azure_byod_returns_correct_response_when_streaming_with_data_keys( self, + azure_blob_service_mock, get_active_config_or_default_mock, azure_openai_mock: MagicMock, env_helper_mock: MagicMock, @@ -630,6 +622,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( "byod" ) + azure_blob_service_mock().get_container_sas.return_value = "_12345" # when response = client.post( From 5d3e3b32215a99352765b1c43c390222b7bbb61a Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Fri, 6 Sep 2024 17:51:57 +0530 Subject: [PATCH 27/28] lint issue fix --- code/tests/test_app.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index 5da74e611..bc94bd348 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -603,10 +603,12 @@ def setup_method(self): @patch( "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" ) - @patch("backend.batch.utilities.common.source_document.AzureBlobStorageClient") + @patch( + "backend.batch.utilities.helpers.azure_blob_storage_client.generate_container_sas" + ) def test_conversation_azure_byod_returns_correct_response_when_streaming_with_data_keys( self, - azure_blob_service_mock, + generate_container_sas_mock: MagicMock, get_active_config_or_default_mock, azure_openai_mock: MagicMock, env_helper_mock: MagicMock, @@ -622,7 +624,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( "byod" ) - azure_blob_service_mock().get_container_sas.return_value = "_12345" + generate_container_sas_mock.return_value = "mock-sas" # when response = client.post( From 136e199b8b087f637c9ea670e5dee2fe5e7cedf0 Mon Sep 17 00:00:00 2001 From: "Ajit Padhi (Persistent Systems Inc)" Date: Fri, 6 Sep 2024 17:59:56 +0530 Subject: [PATCH 28/28] lint issue fix --- code/tests/test_app.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/code/tests/test_app.py b/code/tests/test_app.py index bc94bd348..776e48bef 100644 --- a/code/tests/test_app.py +++ b/code/tests/test_app.py @@ -698,8 +698,12 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da @patch( "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" ) + @patch( + "backend.batch.utilities.helpers.azure_blob_storage_client.generate_container_sas" + ) def test_conversation_azure_byod_returns_correct_response_when_streaming_with_data_rbac( self, + generate_container_sas_mock: MagicMock, get_active_config_or_default_mock, azure_openai_mock: MagicMock, env_helper_mock: MagicMock, @@ -711,6 +715,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( "byod" ) + generate_container_sas_mock.return_value = "mock-sas" openai_client_mock = azure_openai_mock.return_value openai_client_mock.chat.completions.create.return_value = ( self.mock_streamed_response @@ -754,8 +759,12 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da @patch( "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" ) + @patch( + "backend.batch.utilities.helpers.azure_blob_storage_client.generate_container_sas" + ) def test_conversation_azure_byod_returns_correct_response_when_not_streaming_with_data( self, + generate_container_sas_mock: MagicMock, get_active_config_or_default_mock, azure_openai_mock: MagicMock, env_helper_mock: MagicMock, @@ -767,6 +776,7 @@ def test_conversation_azure_byod_returns_correct_response_when_not_streaming_wit get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( "byod" ) + generate_container_sas_mock.return_value = "mock-sas" openai_client_mock = azure_openai_mock.return_value openai_client_mock.chat.completions.create.return_value = self.mock_response @@ -1114,8 +1124,12 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_without @patch( "backend.batch.utilities.helpers.config.config_helper.ConfigHelper.get_active_config_or_default" ) + @patch( + "backend.batch.utilities.helpers.azure_blob_storage_client.generate_container_sas" + ) def test_conversation_azure_byod_uses_semantic_config( self, + generate_container_sas_mock: MagicMock, get_active_config_or_default_mock, azure_openai_mock: MagicMock, client: FlaskClient, @@ -1125,6 +1139,7 @@ def test_conversation_azure_byod_uses_semantic_config( get_active_config_or_default_mock.return_value.prompts.conversational_flow = ( "byod" ) + generate_container_sas_mock.return_value = "mock-sas" openai_client_mock = azure_openai_mock.return_value openai_client_mock.chat.completions.create.return_value = ( self.mock_streamed_response