From 8372f20d53526bc129a13b39d64610389876ed4c Mon Sep 17 00:00:00 2001 From: Clemens Siebler Date: Thu, 6 Jul 2023 12:11:26 +0200 Subject: [PATCH] Updated the main answering prompt to be more robust in providing sources, and fixed frontend issue when chunk was the first one --- backend/utilities/ConfigHelper.py | 17 +++++++++++------ backend/utilities/QuestionHandler.py | 2 +- frontend/src/components/Answer/Answer.tsx | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/backend/utilities/ConfigHelper.py b/backend/utilities/ConfigHelper.py index e65ab2f0d..fe0341e9f 100644 --- a/backend/utilities/ConfigHelper.py +++ b/backend/utilities/ConfigHelper.py @@ -66,14 +66,19 @@ def get_default_config(): {chat_history} Follow Up Input: {question} Standalone question:""", - "answering_prompt": """{summaries} + "answering_prompt": """Context: +{summaries} + +Please reply to the question using only the information Context section above. If you can't answer a question using the context, reply politely that the information is not in the knowledge base. DO NOT make up your own answers. You detect the language of the question and answer in the same language. If asked for enumerations list all of them and do not invent any. + +The context is structured like this: -Please reply to the question using only the information present in the text above. -If you can't find it, reply politely that the information is not in the knowledge base. -Detect the language of the question and answer in the same language. -If asked for enumerations list all of them and do not invent any. +Content: +Source: [url/to/file.pdf](url/to/file.pdf_SAS_TOKEN_PLACEHOLDER_) + -Each source has a name followed by a colon and the actual information, always include the source name for each fact you use in the response. Always use double square brackets to reference the filename source, e.g. [[info1.pdf.txt]]. Don't combine sources, list each source separately, e.g. [[info1.pdf]][[info2.txt]]. +When you give your answer, you ALWAYS MUST include the source in your response in the following format: [[file.pdf]] +Always use double square brackets to reference the filename source, e.g. [[file.pdf]]. When using multiple sources, list each source separately, e.g. [[file1.pdf]][[file2.pdf]]. Question: {question} Answer:""", diff --git a/backend/utilities/QuestionHandler.py b/backend/utilities/QuestionHandler.py index 211c4f579..cf1119633 100644 --- a/backend/utilities/QuestionHandler.py +++ b/backend/utilities/QuestionHandler.py @@ -130,7 +130,7 @@ def get_answer_using_langchain(self, question, chat_history): break doc = result["source_documents"][idx] - # Then update the citation object in the response + # Then update the citation object in the response, it needs to have filepath and chunk_id to render in the UI as a file messages[0]["content"]["citations"].append( { "content": doc.page_content, diff --git a/frontend/src/components/Answer/Answer.tsx b/frontend/src/components/Answer/Answer.tsx index abc2f8cd8..784ef6061 100644 --- a/frontend/src/components/Answer/Answer.tsx +++ b/frontend/src/components/Answer/Answer.tsx @@ -38,7 +38,7 @@ export const Answer = ({ const createCitationFilepath = (citation: Citation, index: number, truncate: boolean = false) => { let citationFilename = ""; - if (citation.filepath && citation.chunk_id) { + if (citation.filepath && citation.chunk_id != null) { if (truncate && citation.filepath.length > filePathTruncationLimit) { const citationLength = citation.filepath.length; citationFilename = `${citation.filepath.substring(0, 20)}...${citation.filepath.substring(citationLength -20)} - Part ${parseInt(citation.chunk_id) + 1}`;