From de3dbf345b3111c971f5993db1376eda6f5243ed Mon Sep 17 00:00:00 2001 From: lvalics Date: Sun, 18 Feb 2024 09:28:02 +0000 Subject: [PATCH] - The conversational retrieval functionality is now operating as expected. It successfully sends the conversation history to the language model, allowing the context from previous interactions to be utilized effectively. - Added support for Ollama as the Language Model (LLM). Ensure Ollama is specified in the .env configuration and the model is preloaded on the server. --- .gitignore | 2 + dj_backend_server/.gitignore | 3 +- dj_backend_server/CHANGELOG.MD | 4 ++ dj_backend_server/api/utils/get_embeddings.py | 18 +++--- dj_backend_server/api/utils/get_openai_llm.py | 62 +++++++------------ dj_backend_server/api/utils/make_chain.py | 1 + dj_backend_server/api/views/views_chat.py | 14 ++--- dj_backend_server/api/views/views_message.py | 32 ++++------ dj_backend_server/example.env | 11 ++-- dj_backend_server/requirements.txt | 7 ++- .../web/services/chat_history_service.py | 39 +++++++----- 11 files changed, 92 insertions(+), 101 deletions(-) diff --git a/.gitignore b/.gitignore index c9ae65c9..2a1061ec 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ dj_backend_server/nginx/nginx.conf dj_backend_server.code-workspace .aider* .aiderignore +dj_backend_server/.vscode/settings.json + diff --git a/dj_backend_server/.gitignore b/dj_backend_server/.gitignore index 6244bad9..c4b9e9a6 100644 --- a/dj_backend_server/.gitignore +++ b/dj_backend_server/.gitignore @@ -37,4 +37,5 @@ pip-delete-this-directory.txt website_data_sources/* venv open-llama-7B-open-instruct.ggmlv3.q4_K_M.bin -llama-2-7b-chat.ggmlv3.q4_K_M.bin \ No newline at end of file +llama-2-7b-chat.ggmlv3.q4_K_M.bin +.vscode/ \ No newline at end of file diff --git a/dj_backend_server/CHANGELOG.MD b/dj_backend_server/CHANGELOG.MD index ee854117..9a720f01 100644 --- a/dj_backend_server/CHANGELOG.MD +++ b/dj_backend_server/CHANGELOG.MD @@ -1,3 +1,7 @@ +2.18.2024 +- The conversational retrieval functionality is now operating as expected. It successfully sends the conversation history to the language model, allowing the context from previous interactions to be utilized effectively. +- Added support for Ollama as the Language Model (LLM). Ensure Ollama is specified in the .env configuration and the model is preloaded on the server. + 2.17.2024 - Incorporate 'Ollama' into your example.env configuration and make sure to reflect these changes in your .env file for compatibility. - We've expanded the logging capabilities within settings.py by deploying logging.debug for more detailed insights, although it remains inactive when the DEBUG mode is off. diff --git a/dj_backend_server/api/utils/get_embeddings.py b/dj_backend_server/api/utils/get_embeddings.py index 61d8b19e..f378ba8c 100644 --- a/dj_backend_server/api/utils/get_embeddings.py +++ b/dj_backend_server/api/utils/get_embeddings.py @@ -18,8 +18,8 @@ def get_azure_embedding(): deployment = os.environ.get("AZURE_OPENAI_EMBEDDING_MODEL_NAME") openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY") client = os.environ.get("AZURE_OPENAI_API_TYPE") - openai_api_base = os.environ['AZURE_OPENAI_API_BASE'] - openai_api_version = os.environ['AZURE_OPENAI_API_VERSION'] + openai_api_base = os.environ["AZURE_OPENAI_API_BASE"] + openai_api_version = os.environ["AZURE_OPENAI_API_VERSION"] return OpenAIEmbeddings( openai_api_key=openai_api_key, @@ -27,14 +27,14 @@ def get_azure_embedding(): client=client, chunk_size=8, openai_api_base=openai_api_base, - openai_api_version=openai_api_version + openai_api_version=openai_api_version, ) def get_openai_embedding(): """Gets embeddings using the OpenAI embedding provider.""" openai_api_key = os.environ.get("OPENAI_API_KEY") - return OpenAIEmbeddings(openai_api_key=openai_api_key, chunk_size=1) + return OpenAIEmbeddings(openai_api_key=openai_api_key, chunk_size=1) def get_llama2_embedding(): @@ -48,15 +48,17 @@ def choose_embedding_provider(): if embedding_provider == EmbeddingProvider.azure.value: return get_azure_embedding() - + elif embedding_provider == EmbeddingProvider.OPENAI.value: return get_openai_embedding() - + elif embedding_provider == EmbeddingProvider.llama2.value: return get_llama2_embedding() else: - available_providers = ", ".join([service.value for service in EmbeddingProvider]) + available_providers = ", ".join( + [service.value for service in EmbeddingProvider] + ) raise ValueError( f"Embedding service '{embedding_provider}' is not currently available. " f"Available services: {available_providers}" @@ -66,4 +68,4 @@ def choose_embedding_provider(): # Main function to get embeddings def get_embeddings() -> Embeddings: """Gets embeddings using the chosen embedding provider.""" - return choose_embedding_provider() \ No newline at end of file + return choose_embedding_provider() diff --git a/dj_backend_server/api/utils/get_openai_llm.py b/dj_backend_server/api/utils/get_openai_llm.py index 87cabca3..95ce348e 100644 --- a/dj_backend_server/api/utils/get_openai_llm.py +++ b/dj_backend_server/api/utils/get_openai_llm.py @@ -5,15 +5,11 @@ from django.utils.timezone import make_aware from datetime import datetime, timezone from uuid import uuid4 -from ollama import Client -from openai import OpenAI from django.conf import settings from langchain_openai.chat_models import ChatOpenAI -from langchain_community.llms import Ollama +from langchain_community.chat_models import ChatOllama from langchain_community.llms import AzureOpenAI from langchain_community.llms import LlamaCpp -from langchain.prompts import PromptTemplate -from langchain.chains import LLMChain from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from web.models.failed_jobs import FailedJob @@ -62,12 +58,7 @@ def get_llama_llm(): def get_azure_openai_llm(): """Returns AzureOpenAI instance configured from environment variables""" try: - if settings.DEBUG: - openai_api_type = "openai" # JUST FOR DEVELOPMENT - logging.debug(f"DEVELOPMENT Using API Type: {openai_api_type}") - else: - openai_api_type = os.environ["AZURE_OPENAI_API_TYPE"] - + openai_api_type = os.environ["AZURE_OPENAI_API_TYPE"] openai_api_key = os.environ["AZURE_OPENAI_API_KEY"] openai_deployment_name = os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] openai_model_name = os.environ["AZURE_OPENAI_COMPLETION_MODEL"] @@ -134,22 +125,18 @@ def get_openai_llm(): traceback.print_exc() -def get_ollama_llm(sanitized_question): - """Returns an Ollama Server instance configured from environment variables""" - llm = Client(host=os.environ.get("OLLAMA_URL")) - # Use the client to make a request +def get_ollama_llm(): + """Returns an Ollama instance configured from environment variables""" try: - if sanitized_question: - response = llm.chat( - model=os.environ.get("OLLAMA_MODEL_NAME"), - messages=[{"role": "user", "content": sanitized_question}], - ) - else: - raise ValueError("Question cannot be None.") - if response: - return response - else: - raise ValueError("Invalid response from Ollama.") + base_url = os.environ.get("OLLAMA_URL") + model = os.environ.get("OLLAMA_MODEL_NAME", "llama2") + + llm = ChatOllama( + base_url=base_url, + model=model, + callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), + ) + return llm except Exception as e: logger.debug(f"Exception in get_ollama_llm: {e}") @@ -157,7 +144,7 @@ def get_ollama_llm(sanitized_question): uuid=str(uuid4()), connection="default", queue="default", - payload="get_openai_llm", + payload="get_ollama_llm", exception=str(e), failed_at=make_aware(datetime.now(), timezone.utc), ) @@ -176,29 +163,26 @@ def get_llm(): "ollama": lambda: get_ollama_llm(), } + # DEVENV + # if settings.DEBUG: + # api_type = "ollama" api_type = os.environ.get("OPENAI_API_TYPE", "openai") + if api_type not in clients: raise ValueError(f"Invalid OPENAI_API_TYPE: {api_type}") logging.debug(f"Using LLM: {api_type}") if api_type in clients: - if api_type == "ollama": - return clients[api_type]() - elif api_type != "ollama": - return clients[api_type]() + llm_instance = clients[api_type]() + if llm_instance is None: + logger.error(f"LLM instance for {api_type} could not be created.") + return None + return llm_instance else: raise ValueError(f"Invalid OPENAI_API_TYPE: {api_type}") except Exception as e: - failed_job = FailedJob( - uuid=str(uuid4()), - connection="default", - queue="default", - payload="get_llm", - exception=str(e), - failed_at=datetime.now(), - ) failed_job = FailedJob( uuid=str(uuid4()), connection="default", diff --git a/dj_backend_server/api/utils/make_chain.py b/dj_backend_server/api/utils/make_chain.py index a3fdc870..f8e0a71d 100644 --- a/dj_backend_server/api/utils/make_chain.py +++ b/dj_backend_server/api/utils/make_chain.py @@ -98,6 +98,7 @@ def getConversationRetrievalChain( retriever=vector_store.as_retriever(), verbose=True, combine_docs_chain_kwargs={"prompt": prompt}, + return_source_documents=True, ) logger.debug(f"ConversationalRetrievalChain {llm}, created: {chain}") return chain diff --git a/dj_backend_server/api/views/views_chat.py b/dj_backend_server/api/views/views_chat.py index 166354c7..00a87214 100644 --- a/dj_backend_server/api/views/views_chat.py +++ b/dj_backend_server/api/views/views_chat.py @@ -165,21 +165,15 @@ def get_completion_response( elif chain_type == "conversation_retrieval": chain = getConversationRetrievalChain(vector_store, mode, initial_prompt) logger.debug("getConversationRetrievalChain") - chat_history_json = json.dumps( - get_chat_history_for_retrieval_chain( - session_id, limit=20, initial_prompt=initial_prompt - ), - ensure_ascii=False, + chat_history = get_chat_history_for_retrieval_chain( + session_id, limit=20, initial_prompt=initial_prompt ) - chat_history_json = "" - logger.debug(f"Formatted Chat_history {chat_history_json}") + logger.debug(f"Formatted Chat_history {chat_history}") response = chain.invoke( - {"question": sanitized_question, "chat_history": chat_history_json} + {"question": sanitized_question, "chat_history": chat_history}, ) - logger.debug(f"response from chain.invoke: {response}") response_text = response.get("answer") - logger.debug(f"response_text : {response_text}") try: # Attempt to parse the response_text as JSON response_text = json.loads(response_text) diff --git a/dj_backend_server/api/views/views_message.py b/dj_backend_server/api/views/views_message.py index 6bbce3bb..18ac6faf 100644 --- a/dj_backend_server/api/views/views_message.py +++ b/dj_backend_server/api/views/views_message.py @@ -170,26 +170,23 @@ def send_chat(request): """ try: - if settings.DEBUG: - logger.debug("Entering send_chat function") + logger.debug("Entering send_chat function") # You can add additional validation for 'history' and 'content_type' if needed. bot_token = request.headers.get("X-Bot-Token") bot = get_object_or_404(Chatbot, token=bot_token) data = json.loads(request.body) - if settings.DEBUG: - logger.debug( - f"Request data: {data}" - ) # {'from': 'user', 'type': 'text', 'content': 'input text from chat'} + logger.debug( + f"Request data: {data}" + ) # {'from': 'user', 'type': 'text', 'content': 'input text from chat'} # Validate the request data content = data.get("content") history = data.get("history") - if settings.DEBUG: - logger.debug(f"Content: {content}") - logger.debug( - f"History: {history}" - ) # history is a list of chat history - None???? + logger.debug(f"Content: {content}") + logger.debug( + f"History: {history}" + ) # history is a list of chat history - None???? content_type = data.get("type") session_id = get_session_id(request=request, bot_id=bot.id) @@ -198,10 +195,9 @@ def send_chat(request): {"message": entry.message, "from_user": entry.from_user} for entry in history ] - if settings.DEBUG: - logger.debug( - f"History entries in JSON: {history_entries} - and history in text from DB: {history}" - ) + logger.debug( + f"History entries in JSON: {history_entries} - and history in text from DB: {history}" + ) # Implement the equivalent logic for validation if not content: @@ -211,8 +207,7 @@ def send_chat(request): ) # Implement the equivalent logic to send the HTTP request to the external API - if settings.DEBUG: - logger.debug(f"External API response START") + logger.debug(f"External API response START") response = requests.post( os.getenv("APP_URL") + "/api/chat/", json={ @@ -226,8 +221,7 @@ def send_chat(request): }, timeout=200, ) - if settings.DEBUG: - logger.debug(f"External API response: {response.text} and {response}") + logger.debug(f"External API response: {response.text} and {response}") """ This block will first check if the response content is not empty. If it is empty, diff --git a/dj_backend_server/example.env b/dj_backend_server/example.env index 4fee96cd..5e9dcec7 100644 --- a/dj_backend_server/example.env +++ b/dj_backend_server/example.env @@ -19,7 +19,7 @@ OPENAI_API_TYPE=openai OPENAI_API_MODEL=gpt-4-1106-preview OPENAI_API_TEMPERATURE=1 -# azure | openai | llama2 | ollama +# azure | openai | llama2 - change only if you know what you do EMBEDDING_PROVIDER=openai # If using azure @@ -30,22 +30,20 @@ EMBEDDING_PROVIDER=openai # AZURE_OPENAI_DEPLOYMENT_NAME= # AZURE_OPENAI_COMPLETION_MODEL=gpt-35-turbo - +# OLLAMA_URL="" #no trailing slash at the end or will not work. +# OLLAMA_MODEL_NAME="" # ex openchat, llama2 - Be sure you have this on server downloaded "ollama pull openchat" # Vector Store, PINECONE|QDRANT STORE=QDRANT - # if using pinecone # PINECONE_API_KEY= # PINECONE_ENV= # VECTOR_STORE_INDEX_NAME= - # if using qdrant QDRANT_URL=http://qdrant:6333 - # optional, defaults to 15 MAX_PAGES_CRAWL=150 @@ -73,5 +71,4 @@ OCR_LLM = '1' # retrieval_qa | conversation_retrieval, retrieval_qa works better with azure openai # if you want to use the conversation_retrieval | retrieval_qa chain -CHAIN_TYPE=conversation_retrieval - +CHAIN_TYPE=conversation_retrieval \ No newline at end of file diff --git a/dj_backend_server/requirements.txt b/dj_backend_server/requirements.txt index d772269a..e91d0037 100644 --- a/dj_backend_server/requirements.txt +++ b/dj_backend_server/requirements.txt @@ -25,6 +25,9 @@ drf-spectacular==0.27.1 drf_spectacular.extensions==0.0.2 exceptiongroup==1.1.2 frozenlist==1.4.0 +filelock==3.13.1 +fsspec==2024.2.0 +huggingface-hub==0.20.3 grpcio==1.56.2 grpcio-tools==1.56.2 h11==0.14.0 @@ -71,6 +74,7 @@ qdrant-client==1.7.0 redis==4.6.0 regex==2023.6.3 requests==2.31.0 +safetensors==0.4.2 six==1.16.0 sniffio==1.3.0 soupsieve==2.4.1 @@ -79,6 +83,8 @@ sqlparse==0.4.4 tenacity==8.2.2 tiktoken==0.6.0 tqdm==4.65.0 +tokenizers==0.15.2 +transformers==4.37.2 typing-inspect==0.9.0 typing_extensions==4.7.1 tzdata==2023.3 @@ -88,4 +94,3 @@ wcwidth==0.2.6 yarl==1.9.2 django-cors-headers==4.3.1 - diff --git a/dj_backend_server/web/services/chat_history_service.py b/dj_backend_server/web/services/chat_history_service.py index 8a66f940..6e1e011c 100644 --- a/dj_backend_server/web/services/chat_history_service.py +++ b/dj_backend_server/web/services/chat_history_service.py @@ -1,13 +1,18 @@ from typing import List, Optional, Tuple, NamedTuple from web.models.chat_histories import ChatHistory from langchain.schema import BaseMessage, AIMessage, HumanMessage +from langchain.memory import ConversationSummaryBufferMemory from django.conf import settings +from api.utils.get_openai_llm import get_llm import logging logging.config.dictConfig(settings.LOGGING) logger = logging.getLogger(__name__) +from langchain.schema import HumanMessage, AIMessage + + def get_chat_history_for_retrieval_chain( session_id: str, limit: Optional[int] = None, initial_prompt: Optional[str] = None ) -> List[dict]: @@ -25,28 +30,30 @@ def get_chat_history_for_retrieval_chain( query = ChatHistory.objects.filter(session_id=session_id).order_by("created_at") if limit: query = query[:limit] - for entry in query: - role = "user" if entry.from_user == "True" else "assistant" - logger.debug(f"Chat history entry: {entry}, role: {role}") - - chat_history = [ - { - "role": "system", - "content": "This is an initial system message setting up the context.", - } - ] - user_query = None - # Directly interpret the from_user flag to assign roles correctly + chat_history = [] + user_query = None + llm = get_llm() + # Now, chat_history is properly defined and can be used to initialize the memory + memory = ConversationSummaryBufferMemory( + llm=llm, + max_token_limit=1024, + memory_key=session_id, + return_messages=True, + ) + # Assuming chat_history is meant to be a list of messages for the memory + # Here you should convert your query results into the desired format for chat_history + # For example, appending dicts to chat_history list as shown below might be what you intended for entry in query: if entry.from_user == "True": - # This entry is a user query; store it to pair with the next bot response + chat_history.append(HumanMessage(content=entry.message)) user_query = entry.message else: - # This entry is a bot response; pair it with the last user query if available + chat_history.append(AIMessage(content=entry.message)) if user_query is not None: - chat_history.append({"role": "user", "content": user_query}) - chat_history.append({"role": "assistant", "content": entry.message}) + memory.save_context({"input": user_query}, {"output": entry.message}) user_query = None + logger.debug(f"Memory PRINT: {memory}") + # chat_history = memory.load_memory_variables({}) return chat_history