Skip to content

Commit

Permalink
fix some format issues
Browse files Browse the repository at this point in the history
  • Loading branch information
zedy committed Feb 7, 2024
1 parent 7ac0727 commit 7ac4e5f
Show file tree
Hide file tree
Showing 12 changed files with 197 additions and 85 deletions.
10 changes: 8 additions & 2 deletions code/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def stream_without_data(response):

def conversation_without_data(request):
azure_endpoint = f"https://{AZURE_OPENAI_RESOURCE}.openai.azure.com/"
if AZURE_AUTH_TYPE == 'rbac':
if AZURE_AUTH_TYPE == "rbac":
openai_client = AzureOpenAI(azure_endpoint=azure_endpoint, api_version=AZURE_OPENAI_API_VERSION, azure_ad_token_provider=AZURE_TOKEN_PROVIDER)
else:
openai_client = AzureOpenAI(azure_endpoint=azure_endpoint, api_version=AZURE_OPENAI_API_VERSION, api_key=AZURE_OPENAI_KEY)
Expand Down Expand Up @@ -323,6 +323,7 @@ def conversation_azure_byod():
@app.route("/api/conversation/custom", methods=["GET", "POST"])
def conversation_custom():
from backend.batch.utilities.helpers.OrchestratorHelper import Orchestrator, OrchestrationSettings

message_orchestrator = Orchestrator()

try:
Expand All @@ -337,7 +338,12 @@ def conversation_custom():
chat_history = []
for i, k in enumerate(user_assistant_messages):
if i % 2 == 0:
chat_history.append((user_assistant_messages[i]["content"],user_assistant_messages[i+1]["content"]))
chat_history.append(
(
user_assistant_messages[i]["content"],
user_assistant_messages[i + 1]["content"],
)
)
from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper

messages = message_orchestrator.handle_message(
Expand Down
70 changes: 60 additions & 10 deletions code/backend/batch/utilities/helpers/AzureBlobStorageHelper.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from typing import Optional
from datetime import datetime, timedelta
from azure.storage.blob import BlobServiceClient, generate_blob_sas, generate_container_sas, ContentSettings, UserDelegationKey
from azure.storage.blob import (
BlobServiceClient,
generate_blob_sas,
generate_container_sas,
ContentSettings,
UserDelegationKey,
)
from .EnvHelper import EnvHelper
from azure.identity import DefaultAzureCredential
import os


class AzureBlobStorageClient:
Expand All @@ -16,7 +21,7 @@ def __init__(
env_helper: EnvHelper = EnvHelper()

self.auth_type = env_helper.AZURE_AUTH_TYPE
if self.auth_type == 'rbac':
if self.auth_type == "rbac":
self.account_name = account_name if account_name else env_helper.AZURE_BLOB_ACCOUNT_NAME
self.container_name : str = container_name if container_name else env_helper.AZURE_BLOB_CONTAINER_NAME
credential = DefaultAzureCredential()
Expand Down Expand Up @@ -49,9 +54,25 @@ def upload_file(self, bytes_data, file_name, content_type='application/pdf'):
container=self.container_name, blob=file_name
)
# Upload the created file
blob_client.upload_blob(bytes_data, overwrite=True, content_settings=ContentSettings(content_type=content_type))
blob_client.upload_blob(
bytes_data,
overwrite=True,
content_settings=ContentSettings(content_type=content_type),
)
# Generate a SAS URL to the blob and return it, if auth_type is rbac, account_key is None, if not, user_delegation_key is None.
return blob_client.url + '?' + generate_blob_sas(self.account_name, self.container_name, file_name, user_delegation_key=self.user_delegation_key, account_key=self.account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3))
return (
blob_client.url
+ "?"
+ generate_blob_sas(
self.account_name,
self.container_name,
file_name,
user_delegation_key=self.user_delegation_key,
account_key=self.account_key,
permission="r",
expiry=datetime.utcnow() + timedelta(hours=3),
)
)

def download_file(self, file_name):
blob_client = self.blob_service_client.get_blob_client(
Expand Down Expand Up @@ -81,7 +102,14 @@ def get_all_files(self):
)
blob_list = container_client.list_blobs(include="metadata")
# sas = generate_blob_sas(account_name, container_name, blob.name,account_key=account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3))
sas = generate_container_sas(self.account_name, self.container_name, user_delegation_key=self.user_delegation_key, account_key=self.account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3))
sas = generate_container_sas(
self.account_name,
self.container_name,
user_delegation_key=self.user_delegation_key,
account_key=self.account_key,
permission="r",
expiry=datetime.utcnow() + timedelta(hours=3),
)
files = []
converted_files = {}
for blob in blob_list:
Expand Down Expand Up @@ -121,10 +149,12 @@ def get_all_files(self):
return files

def upsert_blob_metadata(self, file_name, metadata):
if self.auth_type == 'rbac':
if self.auth_type == "rbac":
blob_client = self.blob_service_client.get_blob_client(container=self.container_name, blob=file_name)
else:
blob_client = BlobServiceClient.from_connection_string(self.connect_str).get_blob_client(container=self.container_name, blob=file_name)
blob_client = BlobServiceClient.from_connection_string(
self.connect_str
).get_blob_client(container=self.container_name, blob=file_name)
# Read metadata from the blob
blob_metadata = blob_client.get_blob_properties().metadata
# Update metadata
Expand All @@ -134,7 +164,27 @@ def upsert_blob_metadata(self, file_name, metadata):

def get_container_sas(self):
# Generate a SAS URL to the container and return it
return "?" + generate_container_sas(self.account_name, self.container_name, user_delegation_key=self.user_delegation_key, account_key=self.account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3))
return "?" + generate_container_sas(
account_name=self.account_name,
container_name=self.container_name,
user_delegation_key=self.user_delegation_key,
account_key=self.account_key,
permission="r",
expiry=datetime.utcnow() + timedelta(hours=1),
)

def get_blob_sas(self, file_name):
# Generate a SAS URL to the blob and return it
return f"https://{self.account_name}.blob.core.windows.net/{self.container_name}/{file_name}" + "?" + generate_blob_sas(self.account_name, self.container_name, file_name, user_delegation_key=self.user_delegation_key, account_key=self.account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3))
return (
f"https://{self.account_name}.blob.core.windows.net/{self.container_name}/{file_name}"
+ "?"
+ generate_blob_sas(
account_name=self.account_name,
container_name=self.container_name,
blob_name=file_name,
user_delegation_key=self.user_delegation_key,
account_key=self.account_key,
permission="r",
expiry=datetime.utcnow() + timedelta(hours=1),
)
)
21 changes: 15 additions & 6 deletions code/backend/batch/utilities/helpers/AzureFormRecognizerHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,32 @@
import html
import traceback
from .EnvHelper import EnvHelper
import os


class AzureFormRecognizerClient:
def __init__(self) -> None:
env_helper : EnvHelper = EnvHelper()
env_helper: EnvHelper = EnvHelper()

self.AZURE_FORM_RECOGNIZER_ENDPOINT : str = env_helper.AZURE_FORM_RECOGNIZER_ENDPOINT
if env_helper.AZURE_AUTH_TYPE == 'rbac':
self.AZURE_FORM_RECOGNIZER_ENDPOINT: str = (
env_helper.AZURE_FORM_RECOGNIZER_ENDPOINT
)
if env_helper.AZURE_AUTH_TYPE == "rbac":
self.document_analysis_client = DocumentAnalysisClient(
endpoint=self.AZURE_FORM_RECOGNIZER_ENDPOINT, credential=DefaultAzureCredential(), headers={"x-ms-useragent": "chat-with-your-data-solution-accelerator/1.0.0"}
endpoint=self.AZURE_FORM_RECOGNIZER_ENDPOINT,
credential=DefaultAzureCredential(),
headers={
"x-ms-useragent": "chat-with-your-data-solution-accelerator/1.0.0"
},
)
else:
self.AZURE_FORM_RECOGNIZER_KEY : str = env_helper.AZURE_FORM_RECOGNIZER_KEY

self.document_analysis_client = DocumentAnalysisClient(
endpoint=self.AZURE_FORM_RECOGNIZER_ENDPOINT, credential=AzureKeyCredential(self.AZURE_FORM_RECOGNIZER_KEY), headers={"x-ms-useragent": "chat-with-your-data-solution-accelerator/1.0.0"}
endpoint=self.AZURE_FORM_RECOGNIZER_ENDPOINT,
credential=AzureKeyCredential(self.AZURE_FORM_RECOGNIZER_KEY),
headers={
"x-ms-useragent": "chat-with-your-data-solution-accelerator/1.0.0"
},
)

form_recognizer_role_to_html = {
Expand Down
52 changes: 26 additions & 26 deletions code/backend/batch/utilities/helpers/EnvHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, **kwargs) -> None:
self.AZURE_SEARCH_FIELDS_TAG = os.getenv('AZURE_SEARCH_FIELDS_TAG', 'tag')
self.AZURE_SEARCH_FIELDS_METADATA = os.getenv('AZURE_SEARCH_FIELDS_METADATA', 'metadata')
self.AZURE_SEARCH_CONVERSATIONS_LOG_INDEX = os.getenv('AZURE_SEARCH_CONVERSATIONS_LOG_INDEX', 'conversations')
self.AZURE_AUTH_TYPE = os.environ.get("AZURE_AUTH_TYPE", "keys")
self.AZURE_AUTH_TYPE = os.getenv('AZURE_AUTH_TYPE', 'keys')
# Azure OpenAI
self.AZURE_OPENAI_RESOURCE = os.getenv('AZURE_OPENAI_RESOURCE', '')
self.AZURE_OPENAI_MODEL = os.getenv('AZURE_OPENAI_MODEL', '')
Expand All @@ -42,64 +42,64 @@ def __init__(self, **kwargs) -> None:
self.AZURE_OPENAI_API_VERSION = os.getenv('AZURE_OPENAI_API_VERSION', '')
self.AZURE_OPENAI_STREAM = os.getenv('AZURE_OPENAI_STREAM', '')
self.AZURE_OPENAI_EMBEDDING_MODEL = os.getenv('AZURE_OPENAI_EMBEDDING_MODEL', '')
self.AZURE_TOKEN_PROVIDER = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")
self.AZURE_TOKEN_PROVIDER = get_bearer_token_provider(DefaultAzureCredential(), 'https://cognitiveservices.azure.com/.default')
# Initialize Azure keys based on authentication type and environment settings.
# When AZURE_AUTH_TYPE is "rbac", azure keys are None or an empty string.
# When USE_KEY_VAULT environment variable is set, keys are securely fetched from Azure Key Vault using DefaultAzureCredential.
# Otherwise, keys are obtained from environment variables.
if self.AZURE_AUTH_TYPE == "rbac":
if self.AZURE_AUTH_TYPE == 'rbac':
self.AZURE_SEARCH_KEY = None
self.AZURE_OPENAI_KEY = ""
self.AZURE_OPENAI_KEY = ''
self.AZURE_SPEECH_KEY = None
elif os.environ.get("USE_KEY_VAULT"):
elif os.environ.get('USE_KEY_VAULT'):
credential = DefaultAzureCredential()
secret_client = SecretClient(os.environ.get("AZURE_KEY_VAULT_ENDPOINT"), credential)
self.AZURE_SEARCH_KEY = secret_client.get_secret(os.environ.get("AZURE_SEARCH_KEY")).value
self.AZURE_OPENAI_KEY = secret_client.get_secret(os.environ.get("AZURE_OPENAI_KEY")).value
self.AZURE_SPEECH_KEY = secret_client.get_secret(os.environ.get("AZURE_SPEECH_SERVICE_KEY")).value
secret_client = SecretClient(os.environ.get('AZURE_KEY_VAULT_ENDPOINT'), credential)
self.AZURE_SEARCH_KEY = secret_client.get_secret(os.environ.get('AZURE_SEARCH_KEY')).value
self.AZURE_OPENAI_KEY = secret_client.get_secret(os.environ.get('AZURE_OPENAI_KEY')).value
self.AZURE_SPEECH_KEY = secret_client.get_secret(os.environ.get('AZURE_SPEECH_SERVICE_KEY')).value
else:
self.AZURE_SEARCH_KEY = os.environ.get("AZURE_SEARCH_KEY")
self.AZURE_OPENAI_KEY = os.environ.get("AZURE_OPENAI_KEY")
self.AZURE_SPEECH_KEY = os.environ.get("AZURE_SPEECH_SERVICE_KEY")
self.AZURE_SEARCH_KEY = os.environ.get('AZURE_SEARCH_KEY')
self.AZURE_OPENAI_KEY = os.environ.get('AZURE_OPENAI_KEY')
self.AZURE_SPEECH_KEY = os.environ.get('AZURE_SPEECH_SERVICE_KEY')
# Set env for OpenAI SDK
self.OPENAI_API_BASE = f"https://{os.getenv('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"
self.OPENAI_API_TYPE = "azure" if self.AZURE_AUTH_TYPE == "keys" else "azure_ad"
self.OPENAI_API_TYPE = 'azure' if self.AZURE_AUTH_TYPE == 'keys' else 'azure_ad'
self.OPENAI_API_KEY = self.AZURE_OPENAI_KEY
self.OPENAI_API_VERSION = self.AZURE_OPENAI_API_VERSION
os.environ["OPENAI_API_TYPE"] = self.OPENAI_API_TYPE
os.environ["OPENAI_API_BASE"] = f"https://{os.getenv('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = self.OPENAI_API_KEY
os.environ["OPENAI_API_VERSION"] = self.OPENAI_API_VERSION
os.environ['OPENAI_API_TYPE'] = self.OPENAI_API_TYPE
os.environ['OPENAI_API_BASE'] = f"https://{os.getenv('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"
os.environ['OPENAI_API_KEY'] = self.OPENAI_API_KEY
os.environ['OPENAI_API_VERSION'] = self.OPENAI_API_VERSION
# Azure Functions - Batch processing
self.BACKEND_URL = os.getenv("BACKEND_URL", "")
self.AzureWebJobsStorage = os.getenv("AzureWebJobsStorage", "")
self.BACKEND_URL = os.getenv('BACKEND_URL', '')
self.AzureWebJobsStorage = os.getenv('AzureWebJobsStorage', '')
self.DOCUMENT_PROCESSING_QUEUE_NAME = os.getenv(
"DOCUMENT_PROCESSING_QUEUE_NAME", ""
'DOCUMENT_PROCESSING_QUEUE_NAME', ''
)
# Azure Blob Storage
self.AZURE_BLOB_ACCOUNT_NAME = os.getenv('AZURE_BLOB_ACCOUNT_NAME', '')
self.AZURE_BLOB_ACCOUNT_KEY = self.secret_client.get_secret(os.getenv("AZURE_BLOB_ACCOUNT_KEY", '')).value if os.getenv("USE_KEY_VAULT", '') else os.getenv("AZURE_BLOB_ACCOUNT_KEY", '')
self.AZURE_BLOB_ACCOUNT_KEY = self.secret_client.get_secret(os.getenv('AZURE_BLOB_ACCOUNT_KEY', '')).value if os.getenv('USE_KEY_VAULT', '') else os.getenv('AZURE_BLOB_ACCOUNT_KEY', '')
self.AZURE_BLOB_CONTAINER_NAME = os.getenv('AZURE_BLOB_CONTAINER_NAME', '')
# Azure Form Recognizer
self.AZURE_FORM_RECOGNIZER_ENDPOINT = os.getenv('AZURE_FORM_RECOGNIZER_ENDPOINT', '')
self.AZURE_FORM_RECOGNIZER_KEY = self.secret_client.get_secret(os.getenv("AZURE_FORM_RECOGNIZER_KEY", '')).value if os.getenv("USE_KEY_VAULT", '') else os.getenv('AZURE_FORM_RECOGNIZER_KEY', '')
self.AZURE_FORM_RECOGNIZER_KEY = self.secret_client.get_secret(os.getenv('AZURE_FORM_RECOGNIZER_KEY', '')).value if os.getenv('USE_KEY_VAULT', '') else os.getenv('AZURE_FORM_RECOGNIZER_KEY', '')
# Azure App Insights
self.APPINSIGHTS_CONNECTION_STRING = os.getenv(
"APPINSIGHTS_CONNECTION_STRING", ""
'APPINSIGHTS_CONNECTION_STRING', ''
)
# Azure AI Content Safety
self.AZURE_CONTENT_SAFETY_ENDPOINT = os.getenv(
"AZURE_CONTENT_SAFETY_ENDPOINT", ""
'AZURE_CONTENT_SAFETY_ENDPOINT', ''
)
if (
"https" not in self.AZURE_CONTENT_SAFETY_ENDPOINT
and "api.cognitive.microsoft.com" not in self.AZURE_CONTENT_SAFETY_ENDPOINT
):
self.AZURE_CONTENT_SAFETY_ENDPOINT = self.AZURE_FORM_RECOGNIZER_ENDPOINT
self.AZURE_CONTENT_SAFETY_KEY = self.secret_client.get_secret(os.getenv("AZURE_CONTENT_SAFETY_KEY", '')).value if os.getenv("USE_KEY_VAULT", '') else os.getenv('AZURE_CONTENT_SAFETY_KEY', '')
self.AZURE_CONTENT_SAFETY_KEY = self.secret_client.get_secret(os.getenv('AZURE_CONTENT_SAFETY_KEY', '')).value if os.getenv('USE_KEY_VAULT', '') else os.getenv('AZURE_CONTENT_SAFETY_KEY', '')
# Orchestration Settings
self.ORCHESTRATION_STRATEGY = os.getenv(
"ORCHESTRATION_STRATEGY", "openai_function"
'ORCHESTRATION_STRATEGY', 'openai_function'
)

@staticmethod
Expand Down
8 changes: 4 additions & 4 deletions code/backend/batch/utilities/helpers/LLMHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self):
self.auth_type = env_helper.AZURE_AUTH_TYPE
self.token_provider = env_helper.AZURE_TOKEN_PROVIDER

if self.auth_type == 'rbac':
if self.auth_type == "rbac":
self.openai_client = AzureOpenAI(azure_endpoint=env_helper.OPENAI_API_BASE, api_version=env_helper.AZURE_OPENAI_API_VERSION, azure_ad_token_provider=self.token_provider)
else:
self.openai_client = AzureOpenAI(azure_endpoint=env_helper.OPENAI_API_BASE, api_version=env_helper.AZURE_OPENAI_API_VERSION, api_key=env_helper.OPENAI_API_KEY)
Expand All @@ -21,22 +21,22 @@ def __init__(self):
self.embedding_model = env_helper.AZURE_OPENAI_EMBEDDING_MODEL

def get_llm(self):
if self.auth_type == 'rbac':
if self.auth_type == "rbac":
return AzureChatOpenAI(deployment_name=self.llm_model, temperature=0, max_tokens=self.llm_max_tokens, openai_api_version=self.openai_client._api_version, azure_ad_token_provider=self.token_provider)
else:
return AzureChatOpenAI(deployment_name=self.llm_model, temperature=0, max_tokens=self.llm_max_tokens, openai_api_version=self.openai_client._api_version)

# TODO: This needs to have a custom callback to stream back to the UI
def get_streaming_llm(self):
if self.auth_type == 'rbac':
if self.auth_type == "rbac":
return AzureChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler], deployment_name=self.llm_model, temperature=0,
max_tokens=self.llm_max_tokens, openai_api_version=self.openai_client._api_version, azure_ad_token_provider=self.token_provider)
else:
return AzureChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler], deployment_name=self.llm_model, temperature=0,
max_tokens=self.llm_max_tokens, openai_api_version=self.openai_client._api_version)

def get_embedding_model(self):
if self.auth_type == 'rbac':
if self.auth_type == "rbac":
return AzureOpenAIEmbeddings(azure_deployment=self.embedding_model, chunk_size=1, azure_ad_token_provider=self.token_provider)
else:
return AzureOpenAIEmbeddings(azure_deployment=self.embedding_model, chunk_size=1)
Expand Down
Loading

0 comments on commit 7ac4e5f

Please sign in to comment.