Skip to content

Commit

Permalink
Moved LLM/Embeddings Model into LLMHelper to further decouple code
Browse files Browse the repository at this point in the history
  • Loading branch information
csiebler committed Jul 5, 2023
1 parent 5b97cd4 commit 0f02965
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 40 deletions.
30 changes: 5 additions & 25 deletions backend/utilities/DocumentProcessor.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
from .formrecognizer import AzureFormRecognizerClient
from .azureblobstorage import AzureBlobStorageClient

import os
import openai
from dotenv import load_dotenv
import logging
import re
import hashlib
from typing import Optional

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.base import VectorStore
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import TokenTextSplitter, TextSplitter
from langchain.document_loaders.base import BaseLoader
from opencensus.ext.azure.log_exporter import AzureLogHandler


from .formrecognizer import AzureFormRecognizerClient
from .azureblobstorage import AzureBlobStorageClient
from .azuresearch import AzureSearch
from .LLMHelper import LLMHelper
from .ConfigHelper import ConfigHelper

import pandas as pd
Expand All @@ -28,10 +25,7 @@


class DocumentProcessor:
def __init__(
self
):

def __init__(self):
self.pdf_parser: AzureFormRecognizerClient = AzureFormRecognizerClient()
self.blob_client: AzureBlobStorageClient = AzureBlobStorageClient()
self.user_agent: UserAgent = UserAgent()
Expand All @@ -48,21 +42,7 @@ def __init__(
self.azure_search_key: str = os.getenv("AZURE_SEARCH_KEY")
self.index_name: str = os.getenv("AZURE_SEARCH_INDEX")

os.environ["OPENAI_API_BASE"] = f"https://{os.getenv('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_KEY")
os.environ["OPENAI_API_VERSION"] = os.getenv("AZURE_OPENAI_API_VERSION")

openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")
openai.api_key = os.getenv("OPENAI_API_KEY")

# Azure OpenAI settings
self.api_base = openai.api_base
self.api_version = openai.api_version

self.model: str = os.getenv("OPENAI_EMBEDDINGS_ENGINE_DOC", "text-embedding-ada-002")
self.embeddings: OpenAIEmbeddings = OpenAIEmbeddings(model=self.model, chunk_size=1)
self.embeddings = LLMHelper().get_embedding_model()

self.vector_store: VectorStore = AzureSearch(
azure_cognitive_search_name=self.azure_search_endpoint,
Expand Down
34 changes: 34 additions & 0 deletions backend/utilities/LLMHelper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
import openai
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from dotenv import load_dotenv

class LLMHelper:
def __init__(self):
os.environ["OPENAI_API_BASE"] = f"https://{os.getenv('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_KEY")
os.environ["OPENAI_API_VERSION"] = os.getenv("AZURE_OPENAI_API_VERSION")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv("OPENAI_API_KEY")

self.llm = AzureChatOpenAI(deployment_name=os.getenv("AZURE_OPENAI_MODEL"), temperature=0, max_tokens=os.getenv('AZURE_OPENAI_MAX_TOKENS', None), openai_api_version=openai.api_version)
self.embedding_model = OpenAIEmbeddings(model=os.getenv("AZURE_OPENAI_EMBEDDING_MODEL"), chunk_size=1)

def get_llm(self):
return self.llm

def get_embedding_model(self):
return self.embedding_model








18 changes: 3 additions & 15 deletions backend/utilities/QuestionHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,16 @@
import json
from azuresearch import AzureSearch
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from dotenv import load_dotenv
from langchain.chains.llm import LLMChain
from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.callbacks import get_openai_callback
from opencensus.ext.azure.log_exporter import AzureLogHandler

from .azuresearch import AzureSearch
from .ConfigHelper import ConfigHelper
from .LLMHelper import LLMHelper
from .azureblobstorage import AzureBlobStorageClient


Expand All @@ -32,19 +30,9 @@
class QuestionHandler:
def __init__(self):
load_dotenv()

os.environ["OPENAI_API_BASE"] = f"https://{os.getenv('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_KEY")
os.environ["OPENAI_API_VERSION"] = os.getenv("AZURE_OPENAI_API_VERSION")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv("OPENAI_API_KEY")

self.llm = AzureChatOpenAI(deployment_name=os.getenv("AZURE_OPENAI_MODEL"), temperature=0, max_tokens=os.getenv('AZURE_OPENAI_MAX_TOKENS', None), openai_api_version=openai.api_version)
self.embeddings = OpenAIEmbeddings(model=os.getenv("AZURE_OPENAI_EMBEDDING_MODEL"), chunk_size=1)
self.llm = LLMHelper().get_llm()
self.embeddings = LLMHelper().get_embedding_model()

# Connect to search
self.vector_store = AzureSearch(
Expand Down

0 comments on commit 0f02965

Please sign in to comment.